From 6752e5882d9e5706cc0c73563ec8e3204b86312b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 22 Jun 2025 13:27:28 +1200 Subject: [PATCH 01/22] rm SymbolicRegression from registry; add MLJTransforms; @update --- src/registry/Metadata.toml | 448 +++++++++++++++++++++---------------- src/registry/Models.toml | 4 +- src/registry/Project.toml | 2 +- 3 files changed, 263 insertions(+), 191 deletions(-) diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 42bd9b2..bb526b3 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -542,9 +542,9 @@ [BetaML.NeuralNetworkClassifier] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Count, ScientificTypesBase.Finite}}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Count, ScientificTypesBase.Finite}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -719,6 +719,222 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" +[MLJTransforms.TargetEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.TargetEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using\n\nempirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable \ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" +":name" = "TargetEncoder" +":human_name" = "target encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Real\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.MissingnessEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.MissingnessEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":name" = "MissingnessEncoder" +":human_name" = "missingness encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.ContrastEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.ContrastEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`.\n\nIf `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname, k)`,\n\nwhere `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false, \n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":name" = "ContrastEncoder" +":human_name" = "contrast encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.FrequencyEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.FrequencyEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":name" = "FrequencyEncoder" +":human_name" = "frequency encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.CardinalityReducer] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.CardinalityReducer" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency < `min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be\n\nan integer or a float which decides whether raw counts or normalized frequencies are used.\n\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":name" = "CardinalityReducer" +":human_name" = "cardinality reducer" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJTransforms.OrdinalEncoder] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Table`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":target_in_fit" = "`false`" +":is_pure_julia" = "`true`" +":package_name" = "MLJTransforms" +":package_license" = "unknown" +":load_path" = "MLJTransforms.OrdinalEncoder" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":name" = "OrdinalEncoder" +":human_name" = "ordinal encoder" +":is_supervised" = "`false`" +":prediction_type" = ":unknown" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + [CatBoost.CatBoostRegressor] ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -854,7 +1070,7 @@ ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mode"] ":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights, :output_type)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:NTuple{N, AbstractVector}}}}\")`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:Tuple{Vararg{AbstractVector, N}}}}}\")`" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" @@ -4626,7 +4842,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nUnion{Types...}\n```\n\nA `Union` type is an abstract type which includes all instances of any of its argument types. This means that `T <: Union{T,S}` and `S <: Union{T,S}`.\n\nLike other abstract types, it cannot be instantiated, even if all of its arguments are non abstract.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString # instance of Int is included in the union\ntrue\n\njulia> \"Hello!\" isa IntOrString # String is also included\ntrue\n\njulia> 1.0 isa IntOrString # Float64 is not included because it is neither Int nor AbstractString\nfalse\n```\n\n# Extended Help\n\nUnlike most other parametric types, unions are covariant in their parameters. For example, `Union{Real, String}` is a subtype of `Union{Number, AbstractString}`.\n\nThe empty union [`Union{}`](@ref) is the bottom type of Julia.\n""" +":docstring" = """```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n""" ":name" = "Stack" ":human_name" = "probabilistic stack" ":is_supervised" = "`true`" @@ -6156,13 +6372,13 @@ ":constructor" = "`nothing`" [MLJText.TfidfTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":target_in_fit" = "`false`" ":is_pure_julia" = "`true`" ":package_name" = "MLJText" @@ -6192,13 +6408,13 @@ ":constructor" = "`nothing`" [MLJText.CountTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":target_in_fit" = "`false`" ":is_pure_julia" = "`true`" ":package_name" = "MLJText" @@ -6228,13 +6444,13 @@ ":constructor" = "`nothing`" [MLJText.BM25Transformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":target_in_fit" = "`false`" ":is_pure_julia" = "`true`" ":package_name" = "MLJText" @@ -6282,16 +6498,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLGBMClassifier\n```\n\nA model type for constructing a LightGBM classifier, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM\n```\n\nDo `model = LGBMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMClassifier(boosting=...)`.\n\n`LightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of classification tasks.\n\n# Training data In MLJ or MLJBase, bind an instance `model` to data with\n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with scitype(y).\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\nCurrently, the following parameters and their defaults are supported:\n\n * `boosting::String = \"gbdt\"`,\n * `num_iterations::Int = 100::(_ >= 0)`,\n * `learning_rate::Float64 = 0.1::(_ > 0.)`,\n * `num_leaves::Int = 31::(1 < _ <= 131072)`,\n * `max_depth::Int = -1`,\n * `tree_learner::String = \"serial\"`,\n * `histogram_pool_size::Float64 = -1.0`,\n * `min_data_in_leaf::Int = 20::(_ >= 0)`,\n * `min_sum_hessian_in_leaf::Float64 = 1e-3::(_ >= 0.0)`,\n * `max_delta_step::Float64 = 0.0`,\n * `lambda_l1::Float64 = 0.0::(_ >= 0.0)`,\n * `lambda_l2::Float64 = 0.0::(_ >= 0.0)`,\n * `min_gain_to_split::Float64 = 0.0::(_ >= 0.0)`,\n * `feature_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_bynode::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_seed::Int = 2`,\n * `bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `bagging_freq::Int = 0::(_ >= 0)`,\n * `bagging_seed::Int = 3`,\n * `early_stopping_round::Int = 0`,\n * `extra_trees::Bool = false`,\n * `extra_seed::Int = 6`,\n * `max_bin::Int = 255::(_ > 1)`,\n * `bin_construct_sample_cnt = 200000::(_ > 0)`,\n * `drop_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `max_drop::Int = 50`,\n * `skip_drop:: Float64 = 0.5::(0.0 <= _ <= 1)`,\n * `xgboost_dart_mode::Bool = false`,\n * `uniform_drop::Bool = false`,\n * `drop_seed::Int = 4`,\n * `top_rate::Float64 = 0.2::(0.0 <= _ <= 1.0)`,\n * `other_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `min_data_per_group::Int = 100::(_ > 0)`,\n * `max_cat_threshold::Int = 32::(_ > 0)`,\n * `cat_l2::Float64 = 10.0::(_ >= 0)`,\n * `cat_smooth::Float64 = 10.0::(_ >= 0)`,\n * `objective::String = \"multiclass\"`,\n * `categorical_feature::Vector{Int} = Vector{Int}()`,\n * `data_random_seed::Int = 1`,\n * `is_sparse::Bool = true`,\n * `is_unbalance::Bool = false`,\n * `boost_from_average::Bool = true`,\n * `use_missing::Bool = true`,\n * `linear_tree::Bool = false`,\n * `feature_pre_filter::Bool = true`,\n * `metric::Vector{String} = [\"none\"]`,\n * `metric_freq::Int = 1::(_ > 0)`,\n * `is_provide_training_metric::Bool = false`,\n * `eval_at::Vector{Int} = Vector{Int}([1, 2, 3, 4, 5])::(all(_ .> 0))`,\n * `num_machines::Int = 1::(_ > 0)`,\n * `num_threads::Int = 0::(_ >= 0)`,\n * `local_listen_port::Int = 12400::(_ > 0)`,\n * `time_out::Int = 120::(_ > 0)`,\n * `machine_list_file::String = \"\"`,\n * `save_binary::Bool = false`,\n * `device_type::String = \"cpu\"`,\n * `gpu_use_dp::Bool = false`,\n * `gpu_platform_id::Int = -1`,\n * `gpu_device_id::Int = -1`,\n * `num_gpu::Int = 1`,\n * `force_col_wise::Bool = false`,\n * `force_row_wise::Bool = false`,\n * `truncate_booster::Bool = true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMClassification` object, a `CategoricalArray` of the input class names, and the classifier with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM \n\nX, y = @load_iris \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMClassifier() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMClassification`](@ref)\n""" +":docstring" = """```\nLGBMClassifier\n```\n\nA model type for constructing a LightGBM classifier, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM\n```\n\nDo `model = LGBMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMClassifier(objective=...)`.\n\n`LightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of classification tasks.\n\n# Training data In MLJ or MLJBase, bind an instance `model` to data with\n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with scitype(y).\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMClassification` object, a `CategoricalArray` of the input class names, and the classifier with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM \n\nX, y = @load_iris \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMClassifier() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMClassification`](@ref)\n""" ":name" = "LGBMClassifier" ":human_name" = "LightGBM classifier" ":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :sigmoid, :objective, :categorical_feature, :data_random_seed, :is_enable_sparse, :is_unbalance, :boost_from_average, :scale_pos_weight, :use_missing, :linear_tree, :feature_pre_filter, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_filename, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :pred_early_stop, :pred_early_stop_freq, :pred_early_stop_margin, :is_unbalance, :scale_pos_weight, :sigmoid, :boost_from_average, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :multi_error_top_k, :auc_mu_weights, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Float64\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Vector{Float64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6318,16 +6534,16 @@ ":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLGBMRegressor\n```\n\nA model type for constructing a LightGBM regressor, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM\n```\n\nDo `model = LGBMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMRegressor(boosting=...)`.\n\nLightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification, regression and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of regression tasks.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with \n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\nCurrently, the following parameters and their defaults are supported:\n\n * `boosting::String = \"gbdt\"`,\n * `num_iterations::Int = 100::(_ >= 0)`,\n * `learning_rate::Float64 = 0.1::(_ > 0.)`,\n * `num_leaves::Int = 31::(1 < _ <= 131072)`,\n * `max_depth::Int = -1`,\n * `tree_learner::String = \"serial\"`,\n * `histogram_pool_size::Float64 = -1.0`,\n * `min_data_in_leaf::Int = 20::(_ >= 0)`,\n * `min_sum_hessian_in_leaf::Float64 = 1e-3::(_ >= 0.0)`,\n * `max_delta_step::Float64 = 0.0`,\n * `lambda_l1::Float64 = 0.0::(_ >= 0.0)`,\n * `lambda_l2::Float64 = 0.0::(_ >= 0.0)`,\n * `min_gain_to_split::Float64 = 0.0::(_ >= 0.0)`,\n * `feature_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_bynode::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `feature_fraction_seed::Int = 2`,\n * `bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `pos_bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `neg_bagging_fraction::Float64 = 1.0::(0.0 < _ <= 1.0)`,\n * `bagging_freq::Int = 0::(_ >= 0)`,\n * `bagging_seed::Int = 3`,\n * `early_stopping_round::Int = 0`,\n * `extra_trees::Bool = false`,\n * `extra_seed::Int = 6`,\n * `max_bin::Int = 255::(_ > 1)`,\n * `bin_construct_sample_cnt = 200000::(_ > 0)`,\n * `drop_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `max_drop::Int = 50`,\n * `skip_drop:: Float64 = 0.5::(0.0 <= _ <= 1)`,\n * `xgboost_dart_mode::Bool = false`,\n * `uniform_drop::Bool = false`,\n * `drop_seed::Int = 4`,\n * `top_rate::Float64 = 0.2::(0.0 <= _ <= 1.0)`,\n * `other_rate::Float64 = 0.1::(0.0 <= _ <= 1.0)`,\n * `min_data_per_group::Int = 100::(_ > 0)`,\n * `max_cat_threshold::Int = 32::(_ > 0)`,\n * `cat_l2::Float64 = 10.0::(_ >= 0)`,\n * `cat_smooth::Float64 = 10.0::(_ >= 0)`,\n * `objective::String = \"regression\"`,\n * `categorical_feature::Vector{Int} = Vector{Int}()`,\n * `data_random_seed::Int = 1`,\n * `is_sparse::Bool = true`,\n * `is_unbalance::Bool = false`,\n * `boost_from_average::Bool = true`,\n * `scale_pos_weight::Float64 = 1.0`,\n * `use_missing::Bool = true`,\n * `linear_tree::Bool = false`,\n * `feature_pre_filter::Bool = true`,\n * `alpha::Float64 = 0.9::(_ > 0.0 )`,\n * `metric::Vector{String} = [\"l2\"]`,\n * `metric_freq::Int = 1::(_ > 0)`,\n * `is_provide_training_metric::Bool = false`,\n * `eval_at::Vector{Int} = Vector{Int}([1, 2, 3, 4, 5])::(all(_ .> 0))`,\n * `num_machines::Int = 1::(_ > 0)`,\n * `num_threads::Int = 0::(_ >= 0)`,\n * `local_listen_port::Int = 12400::(_ > 0)`,\n * `time_out::Int = 120::(_ > 0)`,\n * `machine_list_file::String = \"\"`,\n * `save_binary::Bool = false`,\n * `device_type::String = \"cpu\"`,\n * `gpu_use_dp::Bool = false`,\n * `gpu_platform_id::Int = -1`,\n * `gpu_device_id::Int = -1`,\n * `num_gpu::Int = 1`,\n * `force_col_wise::Bool = false`,\n * `force_row_wise::Bool = false`,\n * `truncate_booster::Bool = true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMRegression` object, an empty vector, and the regressor with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM \n\nX, y = @load_boston # a table and a vector \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMRegressor() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMRegression`](@ref)\n""" +":docstring" = """```\nLGBMRegressor\n```\n\nA model type for constructing a LightGBM regressor, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM\n```\n\nDo `model = LGBMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMRegressor(objective=...)`.\n\nLightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification, regression and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of regression tasks.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with \n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMRegression` object, an empty vector, and the regressor with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM \n\nX, y = @load_boston # a table and a vector \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMRegressor() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMRegression`](@ref)\n""" ":name" = "LGBMRegressor" ":human_name" = "LightGBM regressor" ":is_supervised" = "`true`" ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:boosting, :num_iterations, :learning_rate, :num_leaves, :max_depth, :tree_learner, :histogram_pool_size, :min_data_in_leaf, :min_sum_hessian_in_leaf, :max_delta_step, :lambda_l1, :lambda_l2, :min_gain_to_split, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :bagging_fraction, :bagging_freq, :bagging_seed, :early_stopping_round, :extra_trees, :extra_seed, :max_bin, :bin_construct_sample_cnt, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :objective, :categorical_feature, :data_random_seed, :is_enable_sparse, :is_unbalance, :boost_from_average, :use_missing, :linear_tree, :feature_pre_filter, :alpha, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :num_threads, :local_listen_port, :time_out, :machine_list_filename, :save_binary, :device_type, :gpu_use_dp, :gpu_platform_id, :gpu_device_id, :num_gpu, :force_col_wise, :force_row_wise, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"String\", \"Vector{Int64}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"Bool\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :is_unbalance, :boost_from_average, :reg_sqrt, :alpha, :fair_c, :poisson_max_delta_step, :tweedie_variance_power, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" @@ -6541,9 +6757,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6556,7 +6772,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Continuous}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -6577,9 +6793,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6592,7 +6808,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Continuous}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -6613,9 +6829,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6649,9 +6865,9 @@ ":prediction_type" = ":deterministic" ":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6664,7 +6880,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Count}`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Count}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" @@ -6685,9 +6901,9 @@ ":prediction_type" = ":probabilistic" ":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = ":nrounds" ":supports_training_losses" = "`false`" ":reports_feature_importances" = "`true`" @@ -6695,150 +6911,6 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[SymbolicRegression.SRTestRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.SRTestRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nSRTestRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSRTestRegressor = @load SRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(9) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" -":name" = "SRTestRegressor" -":human_name" = "Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[SymbolicRegression.MultitargetSRTestRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRTestRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nMultitargetSRTestRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultitargetSRTestRegressor = @load MultitargetSRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultitargetSRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(9) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" -":name" = "MultitargetSRTestRegressor" -":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[SymbolicRegression.MultitargetSRRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(defaults=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" -":name" = "MultitargetSRRegressor" -":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[SymbolicRegression.SRRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(defaults=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" -":name" = "SRRegressor" -":human_name" = "Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - [MLJModels.ConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -7038,7 +7110,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=MLJModels\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (columns) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping columns) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=unknown\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" ":name" = "ContinuousEncoder" ":human_name" = "continuous encoder" ":is_supervised" = "`false`" @@ -7110,7 +7182,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=MLJModels\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=unknown\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" ":name" = "InteractionTransformer" ":human_name" = "interaction transformer" ":is_supervised" = "`false`" @@ -7254,7 +7326,7 @@ ":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=MLJModels\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (columns) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=unknown\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" ":name" = "FillImputer" ":human_name" = "fill imputer" ":is_supervised" = "`false`" @@ -8788,7 +8860,7 @@ ":output_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" ":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Image}, AbstractVector{<:ScientificTypesBase.Multiclass}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Multiclass}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":target_in_fit" = "`true`" diff --git a/src/registry/Models.toml b/src/registry/Models.toml index 76ddb7e..af85918 100644 --- a/src/registry/Models.toml +++ b/src/registry/Models.toml @@ -1,4 +1,5 @@ BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "KMeansClusterer", "NeuralNetworkRegressor", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"] +MLJEnsembles = ["EnsembleModel"] CatBoost = ["CatBoostRegressor", "CatBoostClassifier"] NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"] MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "BernoulliNBClassifier", "PassiveAggressiveClassifier", "RidgeCVRegressor", "SVMRegressor", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "GaussianProcessClassifier", "BaggingClassifier", "OPTICS", "RANSACRegressor", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] @@ -24,13 +25,12 @@ LightGBM = ["LGBMClassifier", "LGBMRegressor"] LaplaceRedux = ["LaplaceClassifier", "LaplaceRegressor"] XGBoost = ["XGBoostCount", "XGBoostRegressor", "XGBoostClassifier"] EvoTrees = ["EvoTreeClassifier", "EvoTreeGaussian", "EvoTreeMLE", "EvoTreeRegressor", "EvoTreeCount"] -SymbolicRegression = ["SRTestRegressor", "MultitargetSRTestRegressor", "MultitargetSRRegressor", "SRRegressor"] MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] OneRule = ["OneRuleClassifier"] OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "INNEDetector", "OCSVMDetector", "ECODDetector", "SODDetector", "LODADetector", "KDEDetector", "CDDetector", "KNNDetector", "GMMDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"] SelfOrganizingMaps = ["SelfOrganizingMap"] LIBSVM = ["SVC", "EpsilonSVR", "LinearSVC", "ProbabilisticSVC", "NuSVR", "NuSVC", "ProbabilisticNuSVC", "OneClassSVM"] TSVD = ["TSVDTransformer"] +MLJTransforms = ["TargetEncoder", "MissingnessEncoder", "ContrastEncoder", "FrequencyEncoder", "CardinalityReducer", "OrdinalEncoder"] GLM = ["LinearBinaryClassifier", "LinearCountRegressor", "LinearRegressor"] MLJFlux = ["EntityEmbedder", "MultitargetNeuralNetworkRegressor", "NeuralNetworkClassifier", "ImageClassifier", "NeuralNetworkBinaryClassifier", "NeuralNetworkRegressor"] -MLJEnsembles = ["EnsembleModel"] diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 61c2aca..caccfe5 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -25,6 +25,7 @@ MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" +MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" @@ -37,7 +38,6 @@ PartitionedLS = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958" SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787" -SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 89966921da0d2adbef6087b959893c00016496bd Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 22 Jun 2025 17:03:44 +1200 Subject: [PATCH 02/22] reinstate SymbolicRegression model --- src/registry/Metadata.toml | 144 +++++++++++++++++++++++++++++++++++++ src/registry/Models.toml | 1 + src/registry/Project.toml | 1 + 3 files changed, 146 insertions(+) diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index bb526b3..1b8dc09 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -6911,6 +6911,150 @@ ":reporting_operations" = "`()`" ":constructor" = "`nothing`" +[SymbolicRegression.SRTestRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "SymbolicRegression" +":package_license" = "Apache-2.0" +":load_path" = "SymbolicRegression.MLJInterfaceModule.SRTestRegressor" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nSRTestRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSRTestRegressor = @load SRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(11) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" +":name" = "SRTestRegressor" +":human_name" = "Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[SymbolicRegression.MultitargetSRTestRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "SymbolicRegression" +":package_license" = "Apache-2.0" +":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRTestRegressor" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nMultitargetSRTestRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultitargetSRTestRegressor = @load MultitargetSRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultitargetSRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(11) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" +":name" = "MultitargetSRTestRegressor" +":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[SymbolicRegression.MultitargetSRRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "SymbolicRegression" +":package_license" = "Apache-2.0" +":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRRegressor" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(defaults=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" +":name" = "MultitargetSRRegressor" +":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[SymbolicRegression.SRRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "SymbolicRegression" +":package_license" = "Apache-2.0" +":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(defaults=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" +":name" = "SRRegressor" +":human_name" = "Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" +":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + [MLJModels.ConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" ":output_scitype" = "`ScientificTypesBase.Unknown`" diff --git a/src/registry/Models.toml b/src/registry/Models.toml index af85918..d139334 100644 --- a/src/registry/Models.toml +++ b/src/registry/Models.toml @@ -25,6 +25,7 @@ LightGBM = ["LGBMClassifier", "LGBMRegressor"] LaplaceRedux = ["LaplaceClassifier", "LaplaceRegressor"] XGBoost = ["XGBoostCount", "XGBoostRegressor", "XGBoostClassifier"] EvoTrees = ["EvoTreeClassifier", "EvoTreeGaussian", "EvoTreeMLE", "EvoTreeRegressor", "EvoTreeCount"] +SymbolicRegression = ["SRTestRegressor", "MultitargetSRTestRegressor", "MultitargetSRRegressor", "SRRegressor"] MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] OneRule = ["OneRuleClassifier"] OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "INNEDetector", "OCSVMDetector", "ECODDetector", "SODDetector", "LODADetector", "KDEDetector", "CDDetector", "KNNDetector", "GMMDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"] diff --git a/src/registry/Project.toml b/src/registry/Project.toml index caccfe5..df82b28 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -38,6 +38,7 @@ PartitionedLS = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958" SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787" +SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 38cc0c914725d1c8db10ea41e7849a37526da5b1 Mon Sep 17 00:00:00 2001 From: "Anthony Blaom, PhD" Date: Sat, 28 Jun 2025 11:55:15 +1200 Subject: [PATCH 03/22] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f90ecd..a8f4ebe 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# MLJModels +# MLJModels.jl [![Build Status](https://github.com/JuliaAI/MLJModels.jl/workflows/CI/badge.svg)](https://github.com/JuliaAI/MLJModels.jl/actions) [![codecov](https://codecov.io/gh/JuliaAI/MLJModels.jl/graph/badge.svg?token=KgarnnCc0K)](https://codecov.io/gh/JuliaAI/MLJModels.jl) From d76ac5bc6af58112e3a4625ff4223e2cb7421035 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 16:39:22 +1200 Subject: [PATCH 04/22] trim metadata.jl --- src/metadata.jl | 80 +++++++++++------------------------------------- test/metadata.jl | 45 +++------------------------ 2 files changed, 22 insertions(+), 103 deletions(-) diff --git a/src/metadata.jl b/src/metadata.jl index 99477ca..e17093d 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -1,25 +1,5 @@ -## UTILITIES FOR ENCODING AND DECODING MODEL METADATA -# (for serializing/deserializing into TOML format) - -# fallback encoding: -function encode_dic(s) - prestring = string("`", s, "`") - # hack for objects with gensyms in their string representation: - str = replace(prestring, '#'=>'_') - return str -end - -encode_dic(s::AbstractString) = string(s) -encode_dic(s::Symbol) = string(":", s) -encode_dic(s::Nothing) = "`nothing`" -encode_dic(v::AbstractVector) = encode_dic.(v) -function encode_dic(d::AbstractDict) - ret = LittleDict{}() - for (k, v) in d - ret[encode_dic(k)] = encode_dic(v) - end - return ret -end +# # DECODING MODEL METADATA +# (deserializing TOML dictionary) function decode_dic(s::String) if !isempty(s) @@ -51,34 +31,11 @@ function decode_dic(d::AbstractDict) return ret end -# the inverse of a multivalued dictionary is a multivalued -# dictionary: -function inverse(d::LittleDict{S,Set{T}}) where {S,T} - dinv = LittleDict{T,Set{S}}() - for key in keys(d) - for val in d[key] - if val in keys(dinv) - push!(dinv[val], key) - else - dinv[val] = Set([key,]) - end - end - end - return dinv -end -function inverse(d::Dict{S,Set{T}}) where {S,T} - dinv = Dict{T,Set{S}}() - for key in keys(d) - for val in d[key] - if val in keys(dinv) - push!(dinv[val], key) - else - dinv[val] = Set([key,]) - end - end - end - return dinv -end + +# # MODEL HANDLES + +Handle = NamedTuple{(:name, :pkg), Tuple{String,String}} +(::Type{Handle})(name,string) = NamedTuple{(:name, :pkg)}((name, string)) function Base.isless(h1::Handle, h2::Handle) if isless(h1.name, h2.name) @@ -90,9 +47,16 @@ function Base.isless(h1::Handle, h2::Handle) end end +function (::Type{Handle})(name::String) + if name in AMBIGUOUS_NAMES + return Handle(name, missing) + else + return Handle(name, first(PKGS_GIVEN_NAME[name])) + end +end + -## FUNCTIONS TO BUILD GLOBAL METADATA CONSTANTS IN MLJMODELS -## INITIALIZATION +# # FUNCTIONS TO BUILD GLOBAL METADATA CONSTANTS # to define INFO_GIVEN_HANDLE function info_given_handle(metadata_file) @@ -113,7 +77,7 @@ function info_given_handle(metadata_file) end -# for use in __init__ to define AMBIGUOUS_NAMES +# to define AMBIGUOUS_NAMES function ambiguous_names(info_given_handle) names_with_duplicates = map(keys(info_given_handle) |> collect) do handle handle.name @@ -124,7 +88,7 @@ function ambiguous_names(info_given_handle) end end -# for use in __init__ to define PKGS_GIVEN_NAME +# to define PKGS_GIVEN_NAME function pkgs_given_name(info_given_handle) handles = keys(info_given_handle) |> collect ret = Dict{String,Vector{String}}() @@ -145,14 +109,6 @@ function model_names(info_given_handle) return unique(names_allowing_duplicates) end -function (::Type{Handle})(name::String) - if name in AMBIGUOUS_NAMES - return Handle(name, missing) - else - return Handle(name, first(PKGS_GIVEN_NAME[name])) - end -end - function model_traits_in_registry(info_given_handle) first_entry = info_given_handle[Handle("ConstantRegressor")] return keys(first_entry) |> collect diff --git a/test/metadata.jl b/test/metadata.jl index 186f98d..433bbd6 100644 --- a/test/metadata.jl +++ b/test/metadata.jl @@ -4,47 +4,10 @@ using Test using MLJModels using MLJBase -@testset "(de)serialization for TOML" begin - d = Dict() - d[:test] = Tuple{Union{Continuous,Missing},Finite} - d["junk"] = Dict{Any,Any}("H" => Missing, :cross => "lemon", - :t => :w, "r" => "r", - "tuple" =>(nothing, Float64), - "vector" =>[1, 2, Int]) - d["a"] = "b" - d[:f] = true - d["j"] = :post - - @test MLJModels.decode_dic(MLJModels.encode_dic(d)) == d -end - -@testset "inverting set-valued dictionaries" begin - d = Dict( - :x => Set([1, 2]), - :y => Set([2, 3, 5]), - :z => Set([4, 7]), - :a => Set([8, 1]), - :b => Set([4,]), - :w => Set([3, 1, 2]), - :t => Set([0,])) - - dinv = Dict( - 0 => Set([:t,]), - 1 => Set([:x, :a, :w]), - 2 => Set([:x, :y, :w]), - 3 => Set([:y, :w]), - 4 => Set([:z, :b]), - 5 => Set([:y,]), - 7 => Set([:z,]), - 8 => Set([:a,])) - @test MLJModels.inverse(d) == dinv -end - -metadata_file = joinpath(@__DIR__, "..", "src", - "registry", "Metadata.toml") +METADATA = MLJModels.METADATA pca = MLJModels.Handle("PCA", "MultivariateStats") cnst = MLJModels.Handle("ConstantRegressor", "MLJModels") -i = MLJModels.info_given_handle(metadata_file)[pca] +i = MLJModels.info_given_handle(METADATA)[pca] @testset "Handle constructors" begin @test MLJModels.Handle("PCA") == @@ -53,8 +16,8 @@ i = MLJModels.info_given_handle(metadata_file)[pca] end @testset "building INFO_GIVEN_HANDLE" begin - @test MLJModels.info_given_handle(metadata_file)[pca][:name] == "PCA" - d1 = MLJModels.info_given_handle(metadata_file)[cnst][:prediction_type] == + @test MLJModels.info_given_handle(METADATA)[pca][:name] == "PCA" + d1 = MLJModels.info_given_handle(METADATA)[cnst][:prediction_type] == :probabilistic end From 95baee71ce68761b9fb18f4f2f28d7d542234da8 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 17:28:28 +1200 Subject: [PATCH 05/22] cleanup registry paths; mv registry_project to /src/; dump julia <1.10 oops --- Project.toml | 16 +++---- src/MLJModels.jl | 23 +++++----- src/init.jl | 7 ---- src/registry/src/activate_registry_project.jl | 42 ------------------- src/registry_project.jl | 23 ++++++++++ test/registry_project.jl | 19 +++++++++ test/runtests.jl | 12 +----- 7 files changed, 62 insertions(+), 80 deletions(-) delete mode 100644 src/init.jl delete mode 100644 src/registry/src/activate_registry_project.jl create mode 100644 src/registry_project.jl create mode 100644 test/registry_project.jl diff --git a/Project.toml b/Project.toml index 48ef150..6017967 100644 --- a/Project.toml +++ b/Project.toml @@ -31,25 +31,25 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" CategoricalArrays = "0.9, 0.10" CategoricalDistributions = "0.1" Combinatorics = "1.0" -Dates = "<0.0.1, 1" +Dates = "1" Distances = "0.9,0.10" Distributions = "0.25" -InteractiveUtils = "<0.0.1, 1" -LinearAlgebra = "<0.0.1, 1" -Markdown = "<0.0.1, 1" +InteractiveUtils = "1" +LinearAlgebra = "1" +Markdown = "1" MLJModelInterface = "1.10" OrderedCollections = "1.1" Parameters = "0.12" -Pkg = "<0.0.1, 1" +Pkg = "1" PrettyPrinting = "0.3, 0.4" -Random = "<0.0.1, 1" +Random = "1" RelocatableFolders = "0.3, 1" ScientificTypes = "3" StatisticalTraits = "3" -Statistics = "<0.0.1, 1" +Statistics = "1" StatsBase = "0.32,0.33, 0.34" Tables = "0.2,1.0" -julia = "1.6" +julia = "1.10" [extras] MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" diff --git a/src/MLJModels.jl b/src/MLJModels.jl index 88e4b0d..fa4dd79 100755 --- a/src/MLJModels.jl +++ b/src/MLJModels.jl @@ -48,7 +48,6 @@ export UnivariateDiscretizer, OneHotEncoder, ContinuousEncoder, FillImputer, UnivariateFillImputer, UnivariateTimeTypeToContinuous, InteractionTransformer -const srcdir = dirname(@__FILE__) # the directory containing this file const MMI = MLJModelInterface if VERSION < v"1.3" @@ -64,28 +63,26 @@ include("builtins/Constant.jl") include("builtins/Transformers.jl") include("builtins/ThresholdPredictors.jl") -Handle = NamedTuple{(:name, :pkg), Tuple{String,String}} -(::Type{Handle})(name,string) = NamedTuple{(:name, :pkg)}((name, string)) +# declare paths to the metadata and associated project file: +const REGISTRY_PROJECT = @path joinpath(@__DIR__, "registry", "Project.toml") +const REGISTRY_METADATA = @path joinpath(@__DIR__, "registry", "Metadata.toml") +Base.include_dependency(REGISTRY_PROJECT) +Base.include_dependency(REGISTRY_METADATA) # load utilities for reading model metadata from file: include("metadata.jl") -# read in the metadata: -metadata_file = joinpath(srcdir, "registry", "Metadata.toml") -Base.include_dependency(metadata_file) -const INFO_GIVEN_HANDLE = info_given_handle(metadata_file) +# read in metadata: +const INFO_GIVEN_HANDLE = info_given_handle(REGISTRY_METADATA) const PKGS_GIVEN_NAME = pkgs_given_name(INFO_GIVEN_HANDLE) const AMBIGUOUS_NAMES = ambiguous_names(INFO_GIVEN_HANDLE) const NAMES = model_names(INFO_GIVEN_HANDLE) const MODEL_TRAITS_IN_REGISTRY = model_traits_in_registry(INFO_GIVEN_HANDLE) -# model search and registry code: +# include tools to search the model registry: include("model_search.jl") -include("loading.jl") -include("registry/src/Registry.jl") -using .Registry -# finalize: -include("init.jl") +# include tools to load model code: +include("loading.jl") end # module diff --git a/src/init.jl b/src/init.jl deleted file mode 100644 index 252a053..0000000 --- a/src/init.jl +++ /dev/null @@ -1,7 +0,0 @@ -const REGISTRY_PATH = @path joinpath(@__DIR__, "registry", "Project.toml") -function __init__() - project = open(REGISTRY_PATH) do io - readlines(io) - end - global REGISTRY_PROJECT = Ref{Vector{String}}(project) -end diff --git a/src/registry/src/activate_registry_project.jl b/src/registry/src/activate_registry_project.jl deleted file mode 100644 index 6abde5d..0000000 --- a/src/registry/src/activate_registry_project.jl +++ /dev/null @@ -1,42 +0,0 @@ -""" - registry_project() - -Experimental, private method. - -Return, as a `Vector{String}`, the lines of the Project.toml used to -generate MLJ Model Registry (aka, model metadata). This Project.toml -file lists as dependencies all packages that provide registered -models. - -""" -registry_project() = MLJModels.REGISTRY_PROJECT[] - -""" - activate_registry_project() - activate_registry_project(path) - -Experimental, private method. - -In the first case, activate a temporary environment using a copy of -the [MLJ Project -Registry](https://github.com/JuliaAI/MLJModels.jl/tree/dev/src/registry) -Project.toml file. This environment will include all packages -providing registered models. - -In the second case, create the environment at the specified `path`. - -To instantiate the environment (for which no Manifest.toml will exist) -run `using Pkg; Pkg.instantiate()`. - -""" -function activate_registry_project(projectdir=mktempdir(; cleanup=false)) - filename, stream = mktemp(projectdir) - for line in registry_project() - write(stream, line*"\n") - end - close(stream) - project_filename = joinpath(first(splitdir(filename)), "Project.toml") - cp(filename, project_filename) - Pkg.activate(projectdir) - return nothing -end diff --git a/src/registry_project.jl b/src/registry_project.jl new file mode 100644 index 0000000..4fac816 --- /dev/null +++ b/src/registry_project.jl @@ -0,0 +1,23 @@ +""" + MLJModels.registry_project() + +Experimental, private method. + +Return, as a `Vector{String}`, the lines of the Project.toml associated with the MLJ Model +Registry. This Project.toml file lists as dependencies all packages that provide +registered models. + +Using this method, one can create a clone of the MLJ Model Registry environment and +activate it, as in the example below. This may be useful in MLJ integrations tests. + +```julia +mkdir("MyEnv") +open("MyEnv/Project.toml", "w") do file + for line in MLJModels.Registry.registry_project() + write(file, line*"\n") + end +end +""" +registry_project() = open(REGISTRY_PROJECT) do io + readlines(io) +end diff --git a/test/registry_project.jl b/test/registry_project.jl new file mode 100644 index 0000000..26abe39 --- /dev/null +++ b/test/registry_project.jl @@ -0,0 +1,19 @@ +using MLJModels +using Pkg + +@testset "registry project can be instantiated" begin + filename, stream = mktemp() + for line in MLJModels.Registry.registry_project() + write(stream, line*"\n") + @show line + end + close(stream) + envname = dirname(filename) + mv(filename, joinpath(envname, "Project.toml"); force=true) + Pkg.activate(envname) + # remove `@suppress` if debugging: + @suppress Pkg.instantiate() + + # smoke test: + @test !isempty(keys(Pkg.dependencies())) +end diff --git a/test/runtests.jl b/test/runtests.jl index 6fcede3..1fa0713 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,17 +1,9 @@ import Pkg -if Base.VERSION >= v"1.10-" - # The issue with stdlib versions being fixed to 0.0.0 has been fixed in new versions of Julia -else - # The next line added as a workaround to - # https://github.com/JuliaLang/Pkg.jl/issues/3628 (Julia 1.6): - Pkg.add(name="Statistics", version=VERSION, uuid="10745b16-79ce-11e8-11f9-7d13ad32a3b2") -end - using Test, MLJModels -@testset "registry" begin - @test include(joinpath("..", "src", "registry", "test", "runtests.jl")) +@testset "registry_project" begin + @test include("registry_project.jl") end @testset "metadata" begin From 359c90ce95864af3799b37d7462da3aa68f95e3d Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 17:33:44 +1200 Subject: [PATCH 06/22] rm test/info_dict.jl as info_dict now lives at MLJModelRegistryTools.jl --- src/registry/test/info_dict.jl | 210 --------------------------------- 1 file changed, 210 deletions(-) delete mode 100644 src/registry/test/info_dict.jl diff --git a/src/registry/test/info_dict.jl b/src/registry/test/info_dict.jl deleted file mode 100644 index 7b6b841..0000000 --- a/src/registry/test/info_dict.jl +++ /dev/null @@ -1,210 +0,0 @@ -module TestInfo - -using MLJModels -using MLJModelInterface -using MLJBase -using Test -using OrderedCollections -using ScientificTypes - -const MMI = MLJModelInterface - -mutable struct DummyProb <: Probabilistic - an_int::Int - a_float::Float64 - a_vector::Vector{Float64} - untyped -end - -MMI.load_path(::Type{DummyProb}) = "GreatPackage.MLJ.DummyProb" -MMI.input_scitype(::Type{DummyProb}) = Table(Finite) -MMI.target_scitype(::Type{DummyProb}) = AbstractVector{<:Continuous} -MMI.is_pure_julia(::Type{DummyProb}) = true -MMI.supports_weights(::Type{DummyProb}) = true -MMI.package_name(::Type{DummyProb}) = "GreatPackage" -MMI.package_uuid(::Type{DummyProb}) = "6f286f6a-111f-5878-ab1e-185364afe411" -MMI.package_url(::Type{DummyProb}) = "https://mickey.mouse.org" -MMI.package_license(::Type{DummyProb}) = "MIT" -MMI.hyperparameter_ranges(::Type{DummyProb}) = - (range(Int, :an_int, values=[1,2]), - range(Float64, :a_float, lower=1, upper=2), - nothing, - nothing) -MMI.predict(::DummyProb, fr, X) = nothing - -""" -dummy determ -""" -mutable struct DummyDeterm <: Deterministic end -MMI.load_path(::Type{DummyDeterm}) = "GreatPackage.MLJ.DummyDeterm" -MMI.input_scitype(::Type{DummyDeterm}) = Table(Finite) -MMI.target_scitype(::Type{DummyDeterm}) = AbstractVector{<:Continuous} -MMI.is_pure_julia(::Type{DummyDeterm}) = true -MMI.supports_weights(::Type{DummyDeterm}) = true -MMI.package_name(::Type{DummyDeterm}) = "GreatPackage" -MMI.package_uuid(::Type{DummyDeterm}) = "6f286f6a-111f-5878-ab1e-185364afe411" -MMI.package_url(::Type{DummyDeterm}) = "https://mickey.mouse.org" -MMI.package_license(::Type{DummyDeterm}) = "MIT" -MMI.predict(::DummyDeterm, fr, X) = nothing - -""" -dummy int -""" -mutable struct DummyInt <: Interval end -MMI.load_path(::Type{DummyInt}) = "GreatPackage.MLJ.DummyInt" -MMI.input_scitype(::Type{DummyInt}) = Table(Finite) -MMI.target_scitype(::Type{DummyInt}) = AbstractVector{<:Continuous} -MMI.is_pure_julia(::Type{DummyInt}) = true -MMI.supports_weights(::Type{DummyInt}) = true -MMI.package_name(::Type{DummyInt}) = "GreatPackage" -MMI.package_uuid(::Type{DummyInt}) = "6f286f6a-111f-5878-ab1e-185364afe411" -MMI.package_url(::Type{DummyInt}) = "https://mickey.mouse.org" -MMI.package_license(::Type{DummyInt}) = "MIT" -MMI.predict(::DummyInt, fr, X) = nothing - -""" -dummy unsup -""" -mutable struct DummyUnsup <: Unsupervised end -MMI.load_path(::Type{DummyUnsup}) = "GreatPackage.MLJ.DummyUnsup" -MMI.input_scitype(::Type{DummyUnsup}) = Table(Finite) -MMI.output_scitype(::Type{DummyUnsup}) = AbstractVector{<:Continuous} -MMI.is_pure_julia(::Type{DummyUnsup}) = true -MMI.supports_weights(::Type{DummyUnsup}) = true -MMI.package_name(::Type{DummyUnsup}) = "GreatPackage" -MMI.package_uuid(::Type{DummyUnsup}) = "6f286f6a-111f-5878-ab1e-185364afe411" -MMI.package_url(::Type{DummyUnsup}) = "https://mickey.mouse.org" -MMI.package_license(::Type{DummyUnsup}) = "MIT" -MMI.transform(::DummyUnsup, fr, X) = nothing - -# helper to check that dictionary `d1` agrees on the key of `d2`, -# which must be subset of those of `d1`: -function _issubset(d1, d2) - k1 = keys(d1) - issubset(k1, keys(d2)) || return false - return all(k1) do k - d1[k] == d2[k] - end -end -d1 = Dict('a'=> 1, 'b' => 2) -d2 = Dict('a' => 1, 'b' => 2, 'c' => 3) -@test _issubset(d1, d2) -d2['b'] = 4 -@test !_issubset(d1, d2) - -@testset "info on probabilistic models" begin - d = LittleDict{Symbol,Any}( - :name => "DummyProb", - :load_path => "GreatPackage.MLJ.DummyProb", - :is_pure_julia => true, - :package_uuid => "6f286f6a-111f-5878-ab1e-185364afe411", - :package_name => "GreatPackage", - :package_license => "MIT", - :input_scitype => Table(Finite), - :output_scitype => Unknown, - :supports_weights => true, - :supports_class_weights => false, - :supports_online => false, - :target_scitype => AbstractVector{<:Continuous}, - :prediction_type => :probabilistic, - :package_url => "https://mickey.mouse.org", - :is_supervised => true, - :is_wrapper => false, - :docstring => "", - :implemented_methods => [:predict, ], - :hyperparameters => (:an_int, :a_float, :a_vector, :untyped), - :hyperparameter_ranges => - (range(Int, :an_int, values=[1,2]), - range(Float64, :a_float, lower=1, upper=2), - nothing, - nothing)) - @test _issubset(d, MLJModels.info_dict(DummyProb)) - @test _issubset(d, MLJModels.info_dict(DummyProb(42, 3.14, [1.0, 2.0], :cow))) -end - -@testset "info on deterministic models" begin - d = LittleDict{Symbol,Any}( - :name => "DummyDeterm", - :load_path => "GreatPackage.MLJ.DummyDeterm", - :is_pure_julia => true, - :package_uuid => "6f286f6a-111f-5878-ab1e-185364afe411", - :package_name => "GreatPackage", - :package_license => "MIT", - :input_scitype => Table(Finite), - :output_scitype => Unknown, - :supports_weights => true, - :supports_class_weights => false, - :supports_online => false, - :target_scitype => AbstractVector{<:Continuous}, - :prediction_type => :deterministic, - :package_url => "https://mickey.mouse.org", - :is_supervised => true, - :is_wrapper => false, - :docstring => "dummy determ\n", - :implemented_methods => [:predict, ], - :hyperparameter_types => (), - :hyperparameters => (), - :hyperparameter_ranges => ()) - - @test _issubset(d, MLJModels.info_dict(DummyDeterm)) - @test _issubset(d, MLJModels.info_dict(DummyDeterm())) -end - -@testset "info on interval models" begin - d = LittleDict{Symbol,Any}( - :name => "DummyInt", - :load_path => "GreatPackage.MLJ.DummyInt", - :is_pure_julia => true, - :package_uuid => "6f286f6a-111f-5878-ab1e-185364afe411", - :package_name => "GreatPackage", - :package_license => "MIT", - :input_scitype => Table(Finite), - :output_scitype => Unknown, - :supports_weights => true, - :supports_class_weights => false, - :supports_online => false, - :target_scitype => AbstractVector{<:Continuous}, - :prediction_type => :interval, - :package_url => "https://mickey.mouse.org", - :is_supervised => true, - :is_wrapper => false, - :docstring => "dummy int\n", - :implemented_methods => [:predict, ], - :hyperparameter_types => (), - :hyperparameters => (), - :hyperparameter_ranges => ()) - - @test _issubset(d, MLJModels.info_dict(DummyInt)) - @test _issubset(d, MLJModels.info_dict(DummyInt())) -end - -@testset "info on unsupervised models" begin - d = LittleDict{Symbol,Any}( - :name => "DummyUnsup", - :load_path => "GreatPackage.MLJ.DummyUnsup", - :is_pure_julia => true, - :package_uuid => "6f286f6a-111f-5878-ab1e-185364afe411", - :package_name => "GreatPackage", - :package_license => "MIT", - :input_scitype => Table(Finite), - :target_scitype => Unknown, - :supports_weights => true, - :supports_class_weights => false, - :prediction_type => :unknown, - :output_scitype => AbstractVector{<:Continuous}, - :package_url => "https://mickey.mouse.org", - :is_supervised => false, - :supports_online => false, - :is_wrapper => false, - :docstring => "dummy unsup\n", - :implemented_methods => [:transform, ], - :hyperparameter_types => (), - :hyperparameters => (), - :hyperparameter_ranges => ()) - - @test _issubset(d, MLJModels.info_dict(DummyUnsup)) - @test _issubset(d, MLJModels.info_dict(DummyUnsup())) -end - -end -true From e9b14c74e5e8219f20b7e53067202c4c313c297f Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 20:03:27 +1200 Subject: [PATCH 07/22] rm check_registry method and GH action; instead put check into tests --- .github/workflows/check_registry.yml | 44 ------------------------- .github/workflows/ci.yml | 3 ++ test/registry.jl | 48 ++++++++++++++++++++++++++++ test/registry_project.jl | 19 ----------- test/runtests.jl | 14 +++++--- 5 files changed, 61 insertions(+), 67 deletions(-) delete mode 100644 .github/workflows/check_registry.yml create mode 100644 test/registry.jl delete mode 100644 test/registry_project.jl diff --git a/.github/workflows/check_registry.yml b/.github/workflows/check_registry.yml deleted file mode 100644 index 7124192..0000000 --- a/.github/workflows/check_registry.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Check Registry -on: - pull_request: - branches: - - master -jobs: - check_registry: - if: (${{ github.head_ref }} == "dev") && (${{ github.repository }} == ${{ github.event.pull_request.head.repo.full_name }}) - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - '1' - os: - - ubuntu-latest - arch: - - x64 - env: - PYTHON: Conda - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v1 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- - - uses: julia-actions/julia-buildpkg@v1 - - run: julia -e 'using Pkg; Pkg.Registry.update()' - - run: julia -e 'using Pkg; Pkg.develop(Pkg.PackageSpec(path = pwd()))' - - run: julia -e 'using Pkg; Pkg.add("Test")' - - run: julia -e 'using Pkg; Pkg.update()' - - run: julia -e 'using Pkg; Pkg.precompile()' - - run: julia -e 'using Pkg; Pkg.status()' - - run: julia -e 'using MLJModels; using Test; problems = MLJModels.check_registry(); @info("", length(problems), problems); @test(isempty(problems))' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a4f6e2..3cd76a9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,6 +41,9 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 + env: + # This environment variable enables the integration tests: + MLJ_TEST_INTEGRATION: '1' - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v4 with: diff --git a/test/registry.jl b/test/registry.jl new file mode 100644 index 0000000..188c7ac --- /dev/null +++ b/test/registry.jl @@ -0,0 +1,48 @@ +using MLJModels +using Pkg +using Distributed +using Suppressor + +@testset "loading all models in the MLJ Model Registry" begin + # assemble the @load commands - one for each model in the registry: + model_proxies =filter( models()) do proxy + !proxy.is_wrapper + end + load_commands = map(model_proxies) do proxy + :(MLJModels.@load $(proxy.name) pkg=$(proxy.package_name)) + end + + # make a clone of the MLJModel registry, to test `registry_project` method: + filename, stream = mktemp() + for line in MLJModels.Registry.registry_project() + write(stream, line*"\n") + end + close(stream) + registry = dirname(filename) # we need to rename project file to ..../Project.toml + mv(filename, joinpath(registry, "Project.toml"); force=true) + + # open a new Julia process in which to activate the registry project and attempt to + # load all models: + id = only(addprocs(1)) + + # define the programs to run in that process: + # 1. To instantiate the registry environment: + program1 = quote + using Pkg + Pkg.activate($registry) + Pkg.instantiate() + using MLJModels + !isempty(keys(Pkg.dependencies())) + end + # 2. To load all the models: + program2 = quote + $(load_commands...) + true + end + # remove `@suppress` to debug: + @test @suppress remotecall_fetch(Main.eval, id, program1) + @info "Attempting to load all MLJ Model Registry models into a Julia process. " + @info "Be patient, this may take five minutes or so..." + @test @suppress remotecall_fetch(Main.eval, id, program2) + rmprocs(id) +end diff --git a/test/registry_project.jl b/test/registry_project.jl deleted file mode 100644 index 26abe39..0000000 --- a/test/registry_project.jl +++ /dev/null @@ -1,19 +0,0 @@ -using MLJModels -using Pkg - -@testset "registry project can be instantiated" begin - filename, stream = mktemp() - for line in MLJModels.Registry.registry_project() - write(stream, line*"\n") - @show line - end - close(stream) - envname = dirname(filename) - mv(filename, joinpath(envname, "Project.toml"); force=true) - Pkg.activate(envname) - # remove `@suppress` if debugging: - @suppress Pkg.instantiate() - - # smoke test: - @test !isempty(keys(Pkg.dependencies())) -end diff --git a/test/runtests.jl b/test/runtests.jl index 1fa0713..05fea36 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,10 +2,6 @@ import Pkg using Test, MLJModels -@testset "registry_project" begin - @test include("registry_project.jl") -end - @testset "metadata" begin @testset "metadata.jl" begin @test include("metadata.jl") @@ -29,3 +25,13 @@ end @test include("builtins/ThresholdPredictors.jl") end end + +if parse(Bool, get(ENV, "MLJ_TEST_REGISTRY", "false")) + @testset "registry" begin + @test include("registry.jl") + end +else + @info "Test of the MLJ Registry is being skipped. Set environment variable "* + "MLJ_TEST_REGISTRY = \"true\" to include them.\n"* + "The Registry test takes at least one hour. " +end From 3eac2ccd48043272545a18c7080bfbf5032238c5 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 20:06:05 +1200 Subject: [PATCH 08/22] rm long-dead orphaned code --- src/GaussianProcesses.jl | 89 ---------------------------------------- 1 file changed, 89 deletions(-) delete mode 100755 src/GaussianProcesses.jl diff --git a/src/GaussianProcesses.jl b/src/GaussianProcesses.jl deleted file mode 100755 index 7b20f4f..0000000 --- a/src/GaussianProcesses.jl +++ /dev/null @@ -1,89 +0,0 @@ -module GaussianProcesses_ - -export GPClassifier - -import MLJModelInterface -import MLJModelInterface: Table, Continuous, Count, Finite, OrderedFactor, - Multiclass - -const MMI = MLJModelInterface - -using CategoricalArrays - -import ..GaussianProcesses # strange lazy-loading syntax - -const GP = GaussianProcesses - -mutable struct GPClassifier{M<:GP.Mean, K<:GP.Kernel} <: MMI.Deterministic - mean::M - kernel::K -end - -function GPClassifier( - ; mean=GP.MeanZero() - , kernel=GP.SE(0.0,1.0)) # binary - - model = GPClassifier( - mean - , kernel) - - message = MMI.clean!(model) - isempty(message) || @warn message - - return model -end - -# function MMI.clean! not provided - -function MMI.fit(model::GPClassifier{M,K} - , verbosity::Int - , X - , y) where {M,K} - - Xmatrix = MMI.matrix(X) - - y_plain = MMI.int(y) - - a_target_element = y[1] - nclasses = length(MMI.classes(a_target_element)) - decode = MMI.decoder(a_target_element) - - gp = GP.GPE(transpose(Xmatrix) - , y_plain - , model.mean - , model.kernel) - GP.fit!(gp, transpose(Xmatrix), y_plain) - - fitresult = (gp, nclasses, decode) - - cache = nothing - report = nothing - - return fitresult, cache, report -end - -function MMI.predict(model::GPClassifier - , fitresult - , Xnew) - - Xmatrix = MMI.matrix(Xnew) - - gp, nclasses, decode = fitresult - - pred = GP.predict_y(gp, transpose(Xmatrix))[1] # Float - # rounding with clamping between 1 and nlevels - pred_rc = clamp.(round.(Int, pred), 1, nclasses) - - return decode(pred_rc) -end - -# metadata: -MMI.load_path(::Type{<:GPClassifier}) = "MLJModels.GaussianProcesses_.GPClassifier" # lazy-loaded from MLJ -MMI.package_name(::Type{<:GPClassifier}) = "GaussianProcesses" -MMI.package_uuid(::Type{<:GPClassifier}) = "891a1506-143c-57d2-908e-e1f8e92e6de9" -MMI.package_url(::Type{<:GPClassifier}) = "https://github.com/STOR-i/GaussianProcesses.jl" -MMI.is_pure_julia(::Type{<:GPClassifier}) = true -MMI.input_scitype(::Type{<:GPClassifier}) = Table(Continuous) -MMI.target_scitype(::Type{<:GPClassifier}) = AbstractVector{<:Finite} - -end # module From 448645853f9290b0626ce9f24fc9ac3fb67d0f8c Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 20:23:10 +1200 Subject: [PATCH 09/22] remove remaining registry maintenance tools --- src/registry/Models.toml | 37 ------ src/registry/README.md | 18 +-- src/registry/src/Registry.jl | 41 ------- src/registry/src/check_registry.jl | 51 --------- src/registry/src/constructors.jl | 31 ----- src/registry/src/info_dict.jl | 27 ----- src/registry/src/update.jl | 177 ----------------------------- src/registry/test/runtests.jl | 5 - 8 files changed, 10 insertions(+), 377 deletions(-) delete mode 100644 src/registry/Models.toml delete mode 100644 src/registry/src/Registry.jl delete mode 100644 src/registry/src/check_registry.jl delete mode 100644 src/registry/src/constructors.jl delete mode 100644 src/registry/src/info_dict.jl delete mode 100644 src/registry/src/update.jl delete mode 100644 src/registry/test/runtests.jl diff --git a/src/registry/Models.toml b/src/registry/Models.toml deleted file mode 100644 index d139334..0000000 --- a/src/registry/Models.toml +++ /dev/null @@ -1,37 +0,0 @@ -BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "KMeansClusterer", "NeuralNetworkRegressor", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"] -MLJEnsembles = ["EnsembleModel"] -CatBoost = ["CatBoostRegressor", "CatBoostClassifier"] -NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"] -MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "BernoulliNBClassifier", "PassiveAggressiveClassifier", "RidgeCVRegressor", "SVMRegressor", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "GaussianProcessClassifier", "BaggingClassifier", "OPTICS", "RANSACRegressor", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"] -OutlierDetectionNeighbors = ["ABODDetector", "DNNDetector", "LOFDetector", "KNNDetector", "COFDetector"] -SIRUS = ["StableRulesClassifier", "StableForestClassifier", "StableRulesRegressor", "StableForestRegressor"] -MLJIteration = ["IteratedModel"] -PartitionedLS = ["PartLS"] -MLJLinearModels = ["QuantileRegressor", "LogisticClassifier", "MultinomialClassifier", "LADRegressor", "RidgeRegressor", "RobustRegressor", "ElasticNetRegressor", "LinearRegressor", "LassoRegressor", "HuberRegressor"] -Maxnet = ["MaxnetBinaryClassifier"] -ParallelKMeans = ["KMeans"] -NaiveBayes = ["GaussianNBClassifier", "MultinomialNBClassifier"] -MLJBase = ["Pipeline", "Resampler", "Stack", "TransformedTargetModel"] -MultivariateStats = ["LDA", "MultitargetLinearRegressor", "BayesianSubspaceLDA", "FactorAnalysis", "LinearRegressor", "ICA", "PPCA", "RidgeRegressor", "KernelPCA", "MultitargetRidgeRegressor", "SubspaceLDA", "BayesianLDA", "PCA"] -DecisionTree = ["AdaBoostStumpClassifier", "DecisionTreeRegressor", "DecisionTreeClassifier", "RandomForestRegressor", "RandomForestClassifier"] -MLJBalancing = ["BalancedBaggingClassifier", "BalancedModel"] -Imbalance = ["RandomOversampler", "SMOTENC", "TomekUndersampler", "ClusterUndersampler", "SMOTE", "SMOTEN", "ROSE", "RandomUndersampler", "ENNUndersampler", "BorderlineSMOTE1", "RandomWalkOversampler"] -MLJTuning = ["TunedModel"] -FeatureSelection = ["FeatureSelector", "RecursiveFeatureElimination"] -Clustering = ["HierarchicalClustering", "DBSCAN", "KMeans", "AffinityPropagation", "KMedoids"] -EvoLinear = ["EvoSplineRegressor", "EvoLinearRegressor"] -MLJText = ["TfidfTransformer", "CountTransformer", "BM25Transformer"] -LightGBM = ["LGBMClassifier", "LGBMRegressor"] -LaplaceRedux = ["LaplaceClassifier", "LaplaceRegressor"] -XGBoost = ["XGBoostCount", "XGBoostRegressor", "XGBoostClassifier"] -EvoTrees = ["EvoTreeClassifier", "EvoTreeGaussian", "EvoTreeMLE", "EvoTreeRegressor", "EvoTreeCount"] -SymbolicRegression = ["SRTestRegressor", "MultitargetSRTestRegressor", "MultitargetSRRegressor", "SRRegressor"] -MLJModels = ["ConstantClassifier", "Standardizer", "DeterministicConstantClassifier", "UnivariateTimeTypeToContinuous", "OneHotEncoder", "ContinuousEncoder", "UnivariateBoxCoxTransformer", "InteractionTransformer", "ConstantRegressor", "UnivariateDiscretizer", "BinaryThresholdPredictor", "FillImputer", "DeterministicConstantRegressor", "UnivariateStandardizer", "UnivariateFillImputer"] -OneRule = ["OneRuleClassifier"] -OutlierDetectionPython = ["MCDDetector", "COPODDetector", "HBOSDetector", "IForestDetector", "SOSDetector", "ABODDetector", "LOFDetector", "PCADetector", "INNEDetector", "OCSVMDetector", "ECODDetector", "SODDetector", "LODADetector", "KDEDetector", "CDDetector", "KNNDetector", "GMMDetector", "COFDetector", "CBLOFDetector", "LOCIDetector", "LMDDDetector", "RODDetector"] -SelfOrganizingMaps = ["SelfOrganizingMap"] -LIBSVM = ["SVC", "EpsilonSVR", "LinearSVC", "ProbabilisticSVC", "NuSVR", "NuSVC", "ProbabilisticNuSVC", "OneClassSVM"] -TSVD = ["TSVDTransformer"] -MLJTransforms = ["TargetEncoder", "MissingnessEncoder", "ContrastEncoder", "FrequencyEncoder", "CardinalityReducer", "OrdinalEncoder"] -GLM = ["LinearBinaryClassifier", "LinearCountRegressor", "LinearRegressor"] -MLJFlux = ["EntityEmbedder", "MultitargetNeuralNetworkRegressor", "NeuralNetworkClassifier", "ImageClassifier", "NeuralNetworkBinaryClassifier", "NeuralNetworkRegressor"] diff --git a/src/registry/README.md b/src/registry/README.md index 594fd93..15da57f 100644 --- a/src/registry/README.md +++ b/src/registry/README.md @@ -1,12 +1,14 @@ The *MLJ Model Registry*, also known as the *model metadata database*, -is the current directory called file -[METADATA.toml](METADATA.toml). It is generated by loading all -packages in [Project.toml](Project.toml), searching for all types -implementing MLJ's model interface, and extracting [model -trait](https://github.com/JuliaAI/MLJModelInterface.jl/blob/dev/src/MLJModelInterface.jl) -values for all such types. The abbreviated list of models in -[Models.toml](Models.toml) is generated at the same time. +consists of the files in this directory: +- [Project.toml](Project.toml): Project file for a Julia package environment whose + dependencies are all packages providing models with metadata searchable by the MLJ user + after running `using MLJ` (or just `using MLJModels`). -[Instructions](https://github.com/JuliaAI/MLJModels.jl#instructions-for-updating-the-mlj-model-registry) on registering models implementing the MLJ interface. +- [Metadata.toml](Metadata.toml): The detailed model metadata, keyed on package name. + +MLJ developers should use +[MLJModelRegistryTools.jl](https://github.com/JuliaAI/MLJModelRegistryTools.jl) to make +updates and corrections, following [the +instructions](https://juliaai.github.io/MLJModelRegistryTools.jl/stable/registry_management_tools/#Registry-management-tools) there. diff --git a/src/registry/src/Registry.jl b/src/registry/src/Registry.jl deleted file mode 100644 index 054443f..0000000 --- a/src/registry/src/Registry.jl +++ /dev/null @@ -1,41 +0,0 @@ -module Registry - -using Pkg -import Pkg.TOML -using MLJModels -import MLJModelInterface -import MLJModelInterface.Model -for T in MLJModelInterface.MODEL_TRAITS - @eval(import MLJModelInterface.$T) -end - -using OrderedCollections -using InteractiveUtils - -# TODO: is this import really needed?? -# for testing decoding of metadata: -import ScientificTypes: Found, Continuous, Finite, Infinite -import ScientificTypes: OrderedFactor, Count, Multiclass, Binary - -export @update, check_registry, activate_registry_project, info_dict - -const srcdir = dirname(@__FILE__) # the directory containing this file -const environment_path = joinpath(srcdir, "..") - -# has tool to generate dictionary of model types keyed on constructor -include("constructors.jl") - -# for extracting model traits from a loaded model type -include("info_dict.jl") - -# for generating and serializing the complete model metadata database -include("update.jl") - -# for checking `@load` works for all models in the database -include("check_registry.jl") - -# for activating a clone of the registry environment: -include("activate_registry_project.jl") - - -end # module diff --git a/src/registry/src/check_registry.jl b/src/registry/src/check_registry.jl deleted file mode 100644 index 1f29429..0000000 --- a/src/registry/src/check_registry.jl +++ /dev/null @@ -1,51 +0,0 @@ -""" - MLJModels.check_registry(; mod=Main, verbosity=1) - -Check that every model in the [MLJ aodel -Registry](https://github.com/JuliaAI/MLJModels.jl/tree/dev/src/registry) -has a working `load_path` trait by using it to import the model -type. Here `mod` should be the module from which the method is called -- `Main` by default, but `mod=@__MODULE__` should work in general. - -Returns a row table detailing the failures, which is empty in the case -of no failures. - -""" -function check_registry(; mod=Main, verbosity=1) - - basedir = Registry.environment_path - mljmodelsdir = joinpath(basedir, "..", "..", ".") - Pkg.activate(basedir) - Pkg.develop(PackageSpec(path=mljmodelsdir)) - Pkg.instantiate() - - models = MLJModels.models() - pkgs = MLJModels.Registry.PACKAGES - - # import packages - verbosity < 1 || @info "Loading model interface packages." - program = quote end - for pkg in pkgs - line = :(import $pkg) - push!(program.args, line) - end - mod.eval(program) - - verbosity < 1 || @info "Checking model load paths." - quote - modeltypes = MLJModels.Registry.finaltypes(MLJModels.Model) - filter!(modeltypes) do T - !isabstracttype(T) && !MLJModels.MLJModelInterface.is_wrapper(T) - end - using MLJTestInterface - fails, _ = MLJTestInterface.test( - modeltypes; - level=1, - mod=$mod, - throw=false, - verbosity=$verbosity - ) - fails - end |> mod.eval - -end diff --git a/src/registry/src/constructors.jl b/src/registry/src/constructors.jl deleted file mode 100644 index 6b06d99..0000000 --- a/src/registry/src/constructors.jl +++ /dev/null @@ -1,31 +0,0 @@ -""" - model_type_given_constructor() - -**Private method.** - -Return a dictionary of all subtypes of MLJ.Model, keyed on constructor. Where multiple -types share a single constructor, there can only be one value, and which value appears is -ambiguous. - -Typically a model type and it's constructor have the same name, but for wrappers, such as -`TunedModel`, several types share the same constructor (e.g., `DeterministicTunedModel`, -`ProbabilisticTunedModel`). - -""" -function model_type_given_constructor() - - # Note that wrappers are required to overload `MLJModelInterface.constructor` and the - # fallback is `nothing`. - modeltypes = - MLJModels.Registry.finaltypes(MLJModels.Model) - filter!(modeltypes) do T - !isabstracttype(T) - end - - return Dict( - map(modeltypes) do M - C = MLJModelInterface.constructor(M) - Pair(isnothing(C) ? M : C, M) - end..., - ) -end diff --git a/src/registry/src/info_dict.jl b/src/registry/src/info_dict.jl deleted file mode 100644 index f57168c..0000000 --- a/src/registry/src/info_dict.jl +++ /dev/null @@ -1,27 +0,0 @@ -# `info_dict` returns a dictionary of model traits which, after -# encoding, can be serialized to TOML file to create the "model -# registry". Not intended to be exposed to user. Note that `info` gets -# the list of traits from the registry but `info_dict` gets the list -# from MLJModelInterface.MODEL_TRAITS, which is larger when new traits are -# added but the registry is not yet updated. - -info_dict(model::Model) = info_dict(typeof(model)) - -ismissing_or_isa(x, T) = ismissing(x) || x isa T - -function info_dict(M::Type{<:Model}) - message = "$M has a bad trait declaration.\n" - ismissing_or_isa(is_pure_julia(M), Bool) || - error(message * "`is_pure_julia($M)` must return true or false") - ismissing_or_isa(supports_weights(M), Bool) || - error(message * "`supports_weights($M)` must return true, "* - "false or missing. ") - ismissing_or_isa(supports_class_weights(M), Bool) || - error(message * "`supports_class_weights($M)` must return true, "* - "false or missing. ") - is_wrapper(M) isa Bool || - error(message * "`is_wrapper($M)` must return true, false. ") - - return LittleDict{Symbol,Any}(trait => eval(:($trait))(M) - for trait in MLJModelInterface.MODEL_TRAITS) -end diff --git a/src/registry/src/update.jl b/src/registry/src/update.jl deleted file mode 100644 index 7f340b3..0000000 --- a/src/registry/src/update.jl +++ /dev/null @@ -1,177 +0,0 @@ -## METHODS TO GENERATE METADATA AND WRITE TO ARCHIVE - -function finaltypes(T::Type) - s = InteractiveUtils.subtypes(T) - if isempty(s) - return [T, ] - else - return reduce(vcat, [finaltypes(S) for S in s]) - end -end - -const project_toml = joinpath(srcdir, "../Project.toml") -const PACKAGES = map( - Symbol, - keys(TOML.parsefile(project_toml)["deps"]) |> collect, -) -filter!(PACKAGES) do pkg - !(pkg in (:InteractiveUtils, :Pkg, :MLJModelInterface, :MLJTestIntegration)) -end - -const package_import_commands = [:(import $pkg) for pkg in PACKAGES] - -macro update() - mod = __module__ - _update(mod, false) -end - -""" - MLJModels.@update - -Update the [MLJ Model -Registry](https://github.com/JuliaAI/MLJModels.jl/tree/dev/src/registry) -by loading all packages in the registry Project.toml file and -searching for types implementing the MLJ model interface. - -*For MLJ administrators only.* - -To register all the models in GreatNewPackage with MLJ: - -- In the dev branch of a clone of the dev branch of MLJModels, change - to the `/src/registry/` directory and, in the latest version of - julia, activate the environment specified by the Project.toml there, - after checking the [compat] conditions there are up to date. It is - suggested you do not use `Revise`. - -- Add `GreatNewPackage` to the environment. - -- In some environment to which your MLJModels clone has been added - (using `Pkg.dev`) execute `using MLJModels; MLJModels.@update`. This updates - `src/registry/Metadata.toml` and `src/registry/Models.toml` (the - latter is generated for convenience and not used by MLJ). - -- Quit your REPL session and make a trivial commit to your MLJModels - branch to force pre-compilation in a new julia session when you run - `using MLJModels`. (For technical reasons the registry is not loaded - in `__init__()`, so without pre-compiliation the new ]registry is not - available.) - -- Test that the interfaces load properly with - `MLJModels.check_registry()`. (CI will fail on dev -> master if - this test fails.) - -- Push your changes to an appropriate branch of MLJModels to make - the updated metadata available to users of the next MLJModels tagged - release. - -Once a new MLJModels version is released, you must make the following updates at MLJ.jl: - -- Ensure `GreatNewPackage` is in the [extras] and [target] sections of the Project.toml - for MLJ.jl (for inclusion in integration tests) - -- Add an entry for the new model(s) in MLJ/docs/ModelDescriptors.toml (for inclusion in - the MLJ Model Browser) - -These last two actions do not require tagging a new MLJ.jl release. - -""" -macro update(ex) - mod = __module__ - test_env_only = eval(ex) - test_env_only isa Bool || "b in @update(b) must be Bool. " - _update(mod, test_env_only) -end - -function _update(mod, test_env_only) - - test_env_only && @info "Testing registry environment only. " - - program1 = quote - @info "Packages to be searched for model implementations:" - for pkg in $PACKAGES - println(pkg) - end - using Pkg - Pkg.activate($environment_path) - @info "resolving Model Registry environment..." - Pkg.resolve() - end - - program2 = quote - warnings = "" - - @info "Instantiating Model Registry environment..." - Pkg.instantiate() - - @info "Loading packages from the Model Registry..." - import MLJModels - using Pkg.TOML - - # import the packages - $(Registry.package_import_commands...) - - @info "Generating model metadata..." - - model_type_given_constructor = MLJModels.Registry.model_type_given_constructor() - constructors = keys(model_type_given_constructor) |> collect - sort!(constructors, by=string) - - # generate and write to file the model metadata: - api_packages = string.(MLJModels.Registry.PACKAGES) - meta_given_package = Dict() - - for C in constructors - M = model_type_given_constructor[C] - _info = MLJModels.info_dict(M) - constructor_name = split(string(C), '.') |> last - _info[:name] = constructor_name - pkg = _info[:package_name] - path = _info[:load_path] - api_pkg = split(path, '.') |> first - pkg in ["unknown",] && begin - global warnings *= "$M `package_name` or `load_path` is \"unknown\")\n" - end - api_pkg in api_packages || begin - global warnings *= "Bad `load_path` trait for $M: "* - "`$api_pkg` not a registered package.\n" - end - - haskey(meta_given_package, pkg) || - (meta_given_package[pkg] = Dict()) - haskey(meta_given_package, constructor_name) && - error("Encountered multiple model names for "* - "`package_name=$pkg`") - meta_given_package[pkg][constructor_name] = _info - println(C, "\u2714 ") - end - print("\r") - - open(joinpath(MLJModels.Registry.srcdir, "../Metadata.toml"), "w") do file - TOML.print(file, MLJModels.encode_dic(meta_given_package)) - end - - # generate and write to file list of models for each package: - models_given_pkg = Dict() - for pkg in keys(meta_given_package) - models_given_pkg[pkg] = collect(keys(meta_given_package[pkg])) - end - open(joinpath(MLJModels.Registry.srcdir, "../Models.toml"), "w") do file - TOML.print(file, models_given_pkg) - end - - isempty(warnings) || @warn warnings - - :(println("Local Metadata.toml updated.")) - - end - - mod.eval(program1) - test_env_only || mod.eval(program2) - - println("\n You can check the registry by running "* - "`MLJModels.check_registry()` but may need to force "* - "recompilation of MLJModels.\n\n"* - "You can safely ignore \"conflicting import\" warnings. ") - - true -end diff --git a/src/registry/test/runtests.jl b/src/registry/test/runtests.jl deleted file mode 100644 index d484e25..0000000 --- a/src/registry/test/runtests.jl +++ /dev/null @@ -1,5 +0,0 @@ -@testset "metadata.jl" begin - @test include("info_dict.jl") -end - -true From 0f0d4d42733c399d1df41351ac0b034bf7bb3b4f Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 20:27:03 +1200 Subject: [PATCH 10/22] add forgotten test dependency --- Project.toml | 6 ++++-- src/registry/README.md | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 6017967..13e2750 100644 --- a/Project.toml +++ b/Project.toml @@ -36,8 +36,8 @@ Distances = "0.9,0.10" Distributions = "0.25" InteractiveUtils = "1" LinearAlgebra = "1" -Markdown = "1" MLJModelInterface = "1.10" +Markdown = "1" OrderedCollections = "1.1" Parameters = "0.12" Pkg = "1" @@ -48,6 +48,7 @@ ScientificTypes = "3" StatisticalTraits = "3" Statistics = "1" StatsBase = "0.32,0.33, 0.34" +Suppressor = "0.2.8" Tables = "0.2,1.0" julia = "1.10" @@ -57,7 +58,8 @@ MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661" MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Test"] +test = ["MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Suppressor", "Test"] diff --git a/src/registry/README.md b/src/registry/README.md index 15da57f..b664e9b 100644 --- a/src/registry/README.md +++ b/src/registry/README.md @@ -11,4 +11,3 @@ MLJ developers should use [MLJModelRegistryTools.jl](https://github.com/JuliaAI/MLJModelRegistryTools.jl) to make updates and corrections, following [the instructions](https://juliaai.github.io/MLJModelRegistryTools.jl/stable/registry_management_tools/#Registry-management-tools) there. - From 16d8e201b29773feced1c34e291cc64696a8d6eb Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 21:05:57 +1200 Subject: [PATCH 11/22] rm some obsolete testing --- src/MLJModels.jl | 3 +++ src/registry/README.md | 2 ++ test/builtins/Constant.jl | 31 +++++++++---------------------- test/builtins/Transformers.jl | 20 -------------------- test/metadata.jl | 2 +- test/registry.jl | 2 +- 6 files changed, 16 insertions(+), 44 deletions(-) diff --git a/src/MLJModels.jl b/src/MLJModels.jl index fa4dd79..ffaae0c 100755 --- a/src/MLJModels.jl +++ b/src/MLJModels.jl @@ -85,4 +85,7 @@ include("model_search.jl") # include tools to load model code: include("loading.jl") +# include tool for cloning the Model Registry project file: +include("registry_project.jl") + end # module diff --git a/src/registry/README.md b/src/registry/README.md index b664e9b..fdac920 100644 --- a/src/registry/README.md +++ b/src/registry/README.md @@ -1,3 +1,5 @@ +# The MLJ Model Registry + The *MLJ Model Registry*, also known as the *model metadata database*, consists of the files in this directory: diff --git a/test/builtins/Constant.jl b/test/builtins/Constant.jl index 4711ae8..d32f306 100644 --- a/test/builtins/Constant.jl +++ b/test/builtins/Constant.jl @@ -19,17 +19,10 @@ X = NamedTuple{(:x1,:x2,:x3)}((rand(10), rand(10), rand(10))) @test MLJBase.predict(model, fitresult, X)[7].μ ≈ d.μ @test MLJBase.predict_mean(model, fitresult, X) ≈ fill(1.5, 10) - d = MLJModels.info_dict(model) - @test d[:input_scitype] == MLJBase.Table - @test d[:target_scitype] == AbstractVector{MLJBase.Continuous} - @test d[:name] == "ConstantRegressor" - @test d[:load_path] == "MLJModels.ConstantRegressor" - - d = MLJModels.info_dict(DeterministicConstantRegressor) - @test d[:input_scitype] == MLJBase.Table - @test d[:target_scitype] == AbstractVector{MLJBase.Continuous} - @test d[:name] == "DeterministicConstantRegressor" - @test d[:load_path] == "MLJModels.DeterministicConstantRegressor" + @test MLJBase.input_scitype(model) == MLJBase.Table + @test MLJBase.target_scitype(model) == AbstractVector{MLJBase.Continuous} + @test MLJBase.name(model) == "ConstantRegressor" + @test MLJBase.load_path(model) == "MLJModels.ConstantRegressor" end @testset "Classifier" begin @@ -59,17 +52,11 @@ end fitresult, cache, report = MLJBase.fit(model, 1, X, y, w) d = MLJBase.UnivariateFinite([y[1], y[2], y[4]], [1/3, 1/4, 5/12]) - d = MLJModels.info_dict(model) - @test d[:input_scitype] == MLJBase.Table - @test d[:target_scitype] == AbstractVector{<:MLJBase.Finite} - @test d[:name] == "ConstantClassifier" - @test d[:load_path] == "MLJModels.ConstantClassifier" - - d = MLJModels.info_dict(DeterministicConstantClassifier) - @test d[:input_scitype] == MLJBase.Table - @test d[:target_scitype] == AbstractVector{<:MLJBase.Finite} - @test d[:name] == "DeterministicConstantClassifier" - @test d[:load_path] == "MLJModels.DeterministicConstantClassifier" + @test MLJBase.input_scitype(model) == MLJBase.Table + @test MLJBase.target_scitype(model) == AbstractVector{<:MLJBase.Finite} + @test MLJBase.name(model) == "ConstantClassifier" + @test MLJBase.load_path(model) == "MLJModels.ConstantClassifier" + end end # module diff --git a/test/builtins/Transformers.jl b/test/builtins/Transformers.jl index df6397c..1a3550f 100644 --- a/test/builtins/Transformers.jl +++ b/test/builtins/Transformers.jl @@ -53,7 +53,6 @@ end @test round.(Int, MLJBase.transform(stand, f, [0,4,8])) == [-1.0,1.0,3.0] @test round.(Int, MLJBase.inverse_transform(stand, f, [-1, 1, 3])) == [0, 4, 8] - infos = MLJModels.info_dict(stand) N = 5 rand_char = rand("abcefgh", N) @@ -157,14 +156,6 @@ end @test Xnew[4] == X[4] @test Xnew[5] == X[5] - infos = MLJModels.info_dict(stand) - - @test infos[:name] == "Standardizer" - @test infos[:input_scitype] == - Union{MLJBase.Table, AbstractVector{<:Continuous}} - @test infos[:output_scitype] == - Union{MLJBase.Table, AbstractVector{<:Continuous}} - # univariate case stand = Standardizer() f, _, _ = MLJBase.fit(stand, 1, [0, 2, 4]) @@ -296,11 +287,6 @@ end e = v - MLJBase.inverse_transform(t, f, MLJBase.transform(t, f, v)) @test sum(abs, e) <= 5000*eps() - infos = MLJModels.info_dict(t) - - @test infos[:name] == "UnivariateBoxCoxTransformer" - @test infos[:input_scitype] == AbstractVector{MLJBase.Continuous} - @test infos[:output_scitype] == AbstractVector{MLJBase.Continuous} end @@ -383,12 +369,6 @@ end f, = MLJBase.fit(t, 0, X) @test_throws Exception MLJBase.transform(t, f, Xmiss) - infos = MLJModels.info_dict(t) - - @test infos[:name] == "OneHotEncoder" - @test infos[:input_scitype] == MLJBase.Table - @test infos[:output_scitype] == MLJBase.Table - # test the work on missing values X = (name = categorical(["Ben", "John", "Mary", "John", missing], ordered=true), height = [1.85, 1.67, 1.5, 1.67, 1.56], diff --git a/test/metadata.jl b/test/metadata.jl index 433bbd6..f5cdfec 100644 --- a/test/metadata.jl +++ b/test/metadata.jl @@ -4,7 +4,7 @@ using Test using MLJModels using MLJBase -METADATA = MLJModels.METADATA +METADATA = MLJModels.REGISTRY_METADATA pca = MLJModels.Handle("PCA", "MultivariateStats") cnst = MLJModels.Handle("ConstantRegressor", "MLJModels") i = MLJModels.info_given_handle(METADATA)[pca] diff --git a/test/registry.jl b/test/registry.jl index 188c7ac..37d17c6 100644 --- a/test/registry.jl +++ b/test/registry.jl @@ -14,7 +14,7 @@ using Suppressor # make a clone of the MLJModel registry, to test `registry_project` method: filename, stream = mktemp() - for line in MLJModels.Registry.registry_project() + for line in MLJModels.registry_project() write(stream, line*"\n") end close(stream) From 26e819c62fb33ef1fd208ee71875d023e4f21a40 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 22 Jul 2025 21:21:08 +1200 Subject: [PATCH 12/22] add forgotten test dep --- Project.toml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 13e2750..af20ac6 100644 --- a/Project.toml +++ b/Project.toml @@ -33,6 +33,7 @@ CategoricalDistributions = "0.1" Combinatorics = "1.0" Dates = "1" Distances = "0.9,0.10" +Distributed = "1" Distributions = "0.25" InteractiveUtils = "1" LinearAlgebra = "1" @@ -53,6 +54,7 @@ Tables = "0.2,1.0" julia = "1.10" [extras] +Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661" MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728" @@ -62,4 +64,4 @@ Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Suppressor", "Test"] +test = ["Distributed", "MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Suppressor", "Test"] From 07f28a10893d6b4451e148c443c1fe7e69aec575 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 23 Jul 2025 00:13:52 +1200 Subject: [PATCH 13/22] rm MLJTransforms from the registry --- src/registry/Project.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/registry/Project.toml b/src/registry/Project.toml index df82b28..5a11e7d 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -25,7 +25,6 @@ MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" -MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" @@ -35,7 +34,6 @@ OutlierDetectionNeighbors = "51249a0a-cb36-4849-8e04-30c7f8d311bb" OutlierDetectionPython = "2449c660-d36c-460e-a68b-92ab3c865b3e" ParallelKMeans = "42b8e9d4-006b-409a-8472-7f34b3fb58af" PartitionedLS = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958" SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787" SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb" From 22ca37ad88bcd50f8915c9a6484e8b0c1f464a77 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 23 Jul 2025 00:16:34 +1200 Subject: [PATCH 14/22] update registry metadata using MLJModelRegistryTools.jl --- src/registry/Metadata.toml | 13599 +++++++++++++++++------------------ 1 file changed, 6780 insertions(+), 6819 deletions(-) diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 1b8dc09..12e4272 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -1,9144 +1,9105 @@ [BetaML.RandomForestRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "BetaML.Bmlj.RandomForestRegressor" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:n_trees, :max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :β, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "random forest regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct RandomForestRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple Random Forest model for regression with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_trees::Int64`: Number of (decision) trees in the forest [def: `30`]\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. square root of the data dimension]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `variance`]. Either `variance` or a custom function. It can also be an anonymous function.\n * `β::Float64`: Parameter that regulate the weights of the scoring of each tree, to be (optionally) used in prediction based on the error of the individual trees computed on the records on which trees have not been trained. Higher values favour \"better\" trees, but too high values will cause overfitting [def: `0`, i.e. uniform weigths]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load RandomForestRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Trees.RandomForestRegressor\n\njulia> model = modelType()\nRandomForestRegressor(\n n_trees = 30, \n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.variance, \n β = 0.0, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(RandomForestRegressor(n_trees = 30, …), …).\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 25.8433\n 21.6 22.4317\n 34.7 35.5742\n 33.4 33.9233\n ⋮ \n 23.9 24.42\n 22.0 22.4433\n 11.9 15.5833\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "RandomForestRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct RandomForestRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple Random Forest model for regression with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_trees::Int64`: Number of (decision) trees in the forest [def: `30`]\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. square root of the data dimension]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `variance`]. Either `variance` or a custom function. It can also be an anonymous function.\n * `β::Float64`: Parameter that regulate the weights of the scoring of each tree, to be (optionally) used in prediction based on the error of the individual trees computed on the records on which trees have not been trained. Higher values favour \"better\" trees, but too high values will cause overfitting [def: `0`, i.e. uniform weigths]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load RandomForestRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Trees.RandomForestRegressor\n\njulia> model = modelType()\nRandomForestRegressor(\n n_trees = 30, \n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.variance, \n β = 0.0, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(RandomForestRegressor(n_trees = 30, …), …).\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 25.8433\n 21.6 22.4317\n 34.7 35.5742\n 33.4 33.9233\n ⋮ \n 23.9 24.42\n 22.0 22.4433\n 11.9 15.5833\n```\n""" -":name" = "RandomForestRegressor" -":human_name" = "random forest regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:n_trees, :max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :β, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.GaussianMixtureImputer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "BetaML.Bmlj.GaussianMixtureImputer" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "gaussian mixture imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct GaussianMixtureImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using a probabilistic approach (Gaussian Mixture Models) fitted using the Expectation-Maximisation algorithm, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::Vector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes``) of the mixtures to employ (see the [`?GMM`](@ref GMM) module in BetaML). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the`initialisation*strategy`parameter is set to \"gived\"` This parameter can also be given symply in term of a _type*. In this case it is automatically extended to a vector of `n_classes``mixtures of the specified type. Note that mixing of different mixture types is not currently supported and that currently implemented mixtures are`SphericalGaussian`,`DiagonalGaussian`and`FullGaussian`. [def:`DiagonalGaussian`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance.\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example :\n\n```julia\njulia> using MLJ\n\njulia> X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] |> table ;\n\njulia> modelType = @load GaussianMixtureImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.GaussianMixtureImputer\n\njulia> model = modelType(initialisation_strategy=\"grid\")\nGaussianMixtureImputer(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"grid\", \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(GaussianMixtureImputer(n_classes = 3, …), …).\nIter. 1: Var. of the post 2.0225921341714286 Log-likelihood -42.96100103213314\n\njulia> X_full = transform(mach) |> MLJ.matrix\n9×2 Matrix{Float64}:\n 1.0 10.5\n 1.5 14.7366\n 1.8 8.0\n 1.7 15.0\n 3.2 40.0\n 2.51842 15.1747\n 3.3 38.0\n 2.47412 -2.3\n 5.2 -2.4\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "GaussianMixtureImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct GaussianMixtureImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using a probabilistic approach (Gaussian Mixture Models) fitted using the Expectation-Maximisation algorithm, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::Vector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes``) of the mixtures to employ (see the [`?GMM`](@ref GMM) module in BetaML). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the`initialisation*strategy`parameter is set to \"gived\"` This parameter can also be given symply in term of a _type*. In this case it is automatically extended to a vector of `n_classes``mixtures of the specified type. Note that mixing of different mixture types is not currently supported and that currently implemented mixtures are`SphericalGaussian`,`DiagonalGaussian`and`FullGaussian`. [def:`DiagonalGaussian`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance.\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example :\n\n```julia\njulia> using MLJ\n\njulia> X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] |> table ;\n\njulia> modelType = @load GaussianMixtureImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.GaussianMixtureImputer\n\njulia> model = modelType(initialisation_strategy=\"grid\")\nGaussianMixtureImputer(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"grid\", \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(GaussianMixtureImputer(n_classes = 3, …), …).\nIter. 1: Var. of the post 2.0225921341714286 Log-likelihood -42.96100103213314\n\njulia> X_full = transform(mach) |> MLJ.matrix\n9×2 Matrix{Float64}:\n 1.0 10.5\n 1.5 14.7366\n 1.8 8.0\n 1.7 15.0\n 3.2 40.0\n 2.51842 15.1747\n 3.3 38.0\n 2.47412 -2.3\n 5.2 -2.4\n```\n""" -":name" = "GaussianMixtureImputer" -":human_name" = "gaussian mixture imputer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":transform"] -":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [BetaML.RandomForestClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "BetaML.Bmlj.RandomForestClassifier" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:n_trees, :max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :β, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "random forest classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct RandomForestClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple Random Forest model for classification with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_trees::Int64`\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. square root of the data dimensions]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `gini`]. Either `gini`, `entropy` or a custom function. It can also be an anonymous function.\n * `β::Float64`: Parameter that regulate the weights of the scoring of each tree, to be (optionally) used in prediction based on the error of the individual trees computed on the records on which trees have not been trained. Higher values favour \"better\" trees, but too high values will cause overfitting [def: `0`, i.e. uniform weigths]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example :\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load RandomForestClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Trees.RandomForestClassifier\n\njulia> model = modelType()\nRandomForestClassifier(\n n_trees = 30, \n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.gini, \n β = 0.0, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(RandomForestClassifier(n_trees = 30, …), …).\n\njulia> cat_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0667, virginica=>0.933)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "RandomForestClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct RandomForestClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple Random Forest model for classification with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_trees::Int64`\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. square root of the data dimensions]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `gini`]. Either `gini`, `entropy` or a custom function. It can also be an anonymous function.\n * `β::Float64`: Parameter that regulate the weights of the scoring of each tree, to be (optionally) used in prediction based on the error of the individual trees computed on the records on which trees have not been trained. Higher values favour \"better\" trees, but too high values will cause overfitting [def: `0`, i.e. uniform weigths]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example :\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load RandomForestClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Trees.RandomForestClassifier\n\njulia> model = modelType()\nRandomForestClassifier(\n n_trees = 30, \n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.gini, \n β = 0.0, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(RandomForestClassifier(n_trees = 30, …), …).\n\njulia> cat_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0667, virginica=>0.933)\n```\n""" -":name" = "RandomForestClassifier" -":human_name" = "random forest classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:n_trees, :max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :β, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.RandomForestImputer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Vector{Int64}\", \"Union{Nothing, Function}\", \"Int64\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "BetaML.Bmlj.RandomForestImputer" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:n_trees, :max_depth, :min_gain, :min_records, :max_features, :forced_categorical_cols, :splitting_criterion, :recursive_passages, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "random forest imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct RandomForestImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using Random Forests, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_trees::Int64`: Number of (decision) trees in the forest [def: `30`]\n * `max_depth::Union{Nothing, Int64}`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `nothing`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Union{Nothing, Int64}`: The maximum number of (random) features to consider at each partitioning [def: `nothing`, i.e. square root of the data dimension]\n * `forced_categorical_cols::Vector{Int64}`: Specify the positions of the integer columns to treat as categorical instead of cardinal. [Default: empty vector (all numerical cols are treated as cardinal by default and the others as categorical)]\n * `splitting_criterion::Union{Nothing, Function}`: Either `gini`, `entropy` or `variance`. This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `nothing`, i.e. `gini` for categorical labels (classification task) and `variance` for numerical labels(regression task)]. It can be an anonymous function.\n * `recursive_passages::Int64`: Define the times to go trough the various columns to impute their data. Useful when there are data to impute on multiple columns. The order of the first passage is given by the decreasing number of missing values per column, the other passages are random [default: `1`].\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] |> table ;\n\njulia> modelType = @load RandomForestImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.RandomForestImputer\n\njulia> model = modelType(n_trees=40)\nRandomForestImputer(\n n_trees = 40, \n max_depth = nothing, \n min_gain = 0.0, \n min_records = 2, \n max_features = nothing, \n forced_categorical_cols = Int64[], \n splitting_criterion = nothing, \n recursive_passages = 1, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(RandomForestImputer(n_trees = 40, …), …).\n\njulia> X_full = transform(mach) |> MLJ.matrix\n9×2 Matrix{Float64}:\n 1.0 10.5\n 1.5 10.3909\n 1.8 8.0\n 1.7 15.0\n 3.2 40.0\n 2.88375 8.66125\n 3.3 38.0\n 3.98125 -2.3\n 5.2 -2.4\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "RandomForestImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct RandomForestImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using Random Forests, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_trees::Int64`: Number of (decision) trees in the forest [def: `30`]\n * `max_depth::Union{Nothing, Int64}`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `nothing`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Union{Nothing, Int64}`: The maximum number of (random) features to consider at each partitioning [def: `nothing`, i.e. square root of the data dimension]\n * `forced_categorical_cols::Vector{Int64}`: Specify the positions of the integer columns to treat as categorical instead of cardinal. [Default: empty vector (all numerical cols are treated as cardinal by default and the others as categorical)]\n * `splitting_criterion::Union{Nothing, Function}`: Either `gini`, `entropy` or `variance`. This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `nothing`, i.e. `gini` for categorical labels (classification task) and `variance` for numerical labels(regression task)]. It can be an anonymous function.\n * `recursive_passages::Int64`: Define the times to go trough the various columns to impute their data. Useful when there are data to impute on multiple columns. The order of the first passage is given by the decreasing number of missing values per column, the other passages are random [default: `1`].\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] |> table ;\n\njulia> modelType = @load RandomForestImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.RandomForestImputer\n\njulia> model = modelType(n_trees=40)\nRandomForestImputer(\n n_trees = 40, \n max_depth = nothing, \n min_gain = 0.0, \n min_records = 2, \n max_features = nothing, \n forced_categorical_cols = Int64[], \n splitting_criterion = nothing, \n recursive_passages = 1, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(RandomForestImputer(n_trees = 40, …), …).\n\njulia> X_full = transform(mach) |> MLJ.matrix\n9×2 Matrix{Float64}:\n 1.0 10.5\n 1.5 10.3909\n 1.8 8.0\n 1.7 15.0\n 3.2 40.0\n 2.88375 8.66125\n 3.3 38.0\n 3.98125 -2.3\n 5.2 -2.4\n```\n""" -":name" = "RandomForestImputer" -":human_name" = "random forest imputer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":transform"] -":hyperparameters" = "`(:n_trees, :max_depth, :min_gain, :min_records, :max_features, :forced_categorical_cols, :splitting_criterion, :recursive_passages, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Vector{Int64}\", \"Union{Nothing, Function}\", \"Int64\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":is_wrapper" = "`false`" [BetaML.PerceptronClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "BetaML.Bmlj.PerceptronClassifier" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:initial_coefficients, :initial_constant, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "perceptron classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct PerceptronClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe classical perceptron algorithm using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones [def: `false`]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load PerceptronClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.PerceptronClassifier\n\njulia> model = modelType()\nPerceptronClassifier(\n initial_coefficients = nothing, \n initial_constant = nothing, \n epochs = 1000, \n shuffle = true, \n force_origin = false, \n return_mean_hyperplane = false, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(PerceptronClassifier(initial_coefficients = nothing, …), …).\n*** Avg. error after epoch 2 : 0.0 (all elements of the set has been correctly classified)\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>2.53e-34, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>1.27e-18, virginica=>1.86e-310)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>2.77e-57, versicolor=>1.1099999999999999e-82, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>3.09e-22, versicolor=>4.03e-25, virginica=>1.0)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "PerceptronClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct PerceptronClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe classical perceptron algorithm using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones [def: `false`]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load PerceptronClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.PerceptronClassifier\n\njulia> model = modelType()\nPerceptronClassifier(\n initial_coefficients = nothing, \n initial_constant = nothing, \n epochs = 1000, \n shuffle = true, \n force_origin = false, \n return_mean_hyperplane = false, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(PerceptronClassifier(initial_coefficients = nothing, …), …).\n*** Avg. error after epoch 2 : 0.0 (all elements of the set has been correctly classified)\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>2.53e-34, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>1.27e-18, virginica=>1.86e-310)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>2.77e-57, versicolor=>1.1099999999999999e-82, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>3.09e-22, versicolor=>4.03e-25, virginica=>1.0)\n```\n""" -":name" = "PerceptronClassifier" -":human_name" = "perceptron classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:initial_coefficients, :initial_constant, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`" -":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.AutoEncoder] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"BetaML.Api.AutoTuneMethod\", \"String\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "BetaML.Bmlj.AutoEncoder" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:encoded_size, :layers_size, :e_layers, :d_layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :tunemethod, :descr, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "auto encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct AutoEncoder <: MLJModelInterface.Unsupervised\n```\n\nA ready-to use AutoEncoder, from the Beta Machine Learning Toolkit (BetaML) for ecoding and decoding of data using neural networks\n\n# Parameters:\n\n * `encoded_size`: The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]\n * `layers_size`: Inner layer dimension (i.e. number of neurons). If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part.\n * `e_layers`: The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `d_layers`: The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as (n x d) matrices.\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost` if `loss==squared_cost`, `nothing` otherwise, i.e. use the derivative of the squared cost or autodiff]\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `8`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`] See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `tunemethod`: The method - and its parameters - to employ for hyperparameters autotuning. See [`SuccessiveHalvingSearch`](@ref) for the default method. To implement automatic hyperparameter tuning during the (first) `fit!` call simply set `autotune=true` and eventually change the default `tunemethod` options (including the parameter ranges, the resources to employ and the loss function to adopt).\n\n * `descr`: An optional title and/or description for this model\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * use `transform` to obtain the encoded data, and `inverse_trasnform` to decode to the original data\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load AutoEncoder pkg = \"BetaML\" verbosity=0;\n\njulia> model = modelType(encoded_size=2,layers_size=10);\n\njulia> mach = machine(model, X)\nuntrained Machine; caches model-specific representations of data\n model: AutoEncoder(e_layers = nothing, …)\n args: \n 1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\njulia> fit!(mach,verbosity=2)\n[ Info: Training machine(AutoEncoder(e_layers = nothing, …), …).\n***\n*** Training for 200 epochs with algorithm BetaML.Nn.ADAM.\nTraining.. \t avg loss on epoch 1 (1): \t 35.48243542158747\nTraining.. \t avg loss on epoch 20 (20): \t 0.07528042222678126\nTraining.. \t avg loss on epoch 40 (40): \t 0.06293071729378613\nTraining.. \t avg loss on epoch 60 (60): \t 0.057035588828991145\nTraining.. \t avg loss on epoch 80 (80): \t 0.056313167754822875\nTraining.. \t avg loss on epoch 100 (100): \t 0.055521461091809436\nTraining the Neural Network... 52%|██████████████████████████████████████ | ETA: 0:00:01Training.. \t avg loss on epoch 120 (120): \t 0.06015206472927942\nTraining.. \t avg loss on epoch 140 (140): \t 0.05536835903285201\nTraining.. \t avg loss on epoch 160 (160): \t 0.05877560142428245\nTraining.. \t avg loss on epoch 180 (180): \t 0.05476302769966953\nTraining.. \t avg loss on epoch 200 (200): \t 0.049240864053557445\nTraining the Neural Network... 100%|█████████████████████████████████████████████████████████████████████████| Time: 0:00:01\nTraining of 200 epoch completed. Final epoch error: 0.049240864053557445.\ntrained Machine; caches model-specific representations of data\n model: AutoEncoder(e_layers = nothing, …)\n args: \n 1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\n\njulia> X_latent = transform(mach, X)\n150×2 Matrix{Float64}:\n 7.01701 -2.77285\n 6.50615 -2.9279\n 6.5233 -2.60754\n ⋮ \n 6.70196 -10.6059\n 6.46369 -11.1117\n 6.20212 -10.1323\n\njulia> X_recovered = inverse_transform(mach,X_latent)\n150×4 Matrix{Float64}:\n 5.04973 3.55838 1.43251 0.242215\n 4.73689 3.19985 1.44085 0.295257\n 4.65128 3.25308 1.30187 0.244354\n ⋮ \n 6.50077 2.93602 5.3303 1.87647\n 6.38639 2.83864 5.54395 2.04117\n 6.01595 2.67659 5.03669 1.83234\n\njulia> BetaML.relative_mean_error(MLJ.matrix(X),X_recovered)\n0.03387721261716176\n\n\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "AutoEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct AutoEncoder <: MLJModelInterface.Unsupervised\n```\n\nA ready-to use AutoEncoder, from the Beta Machine Learning Toolkit (BetaML) for ecoding and decoding of data using neural networks\n\n# Parameters:\n\n * `encoded_size`: The number of neurons (i.e. dimensions) of the encoded data. If the value is a float it is consiered a percentual (to be rounded) of the dimensionality of the data [def: `0.33`]\n * `layers_size`: Inner layer dimension (i.e. number of neurons). If the value is a float it is considered a percentual (to be rounded) of the dimensionality of the data [def: `nothing` that applies a specific heuristic]. Consider that the underlying neural network is trying to predict multiple values at the same times. Normally this requires many more neurons than a scalar prediction. If `e_layers` or `d_layers` are specified, this parameter is ignored for the respective part.\n * `e_layers`: The layers (vector of `AbstractLayer`s) responsable of the encoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `d_layers`: The layers (vector of `AbstractLayer`s) responsable of the decoding of the data [def: `nothing`, i.e. two dense layers with the inner one of `layers_size`]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as (n x d) matrices.\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost` if `loss==squared_cost`, `nothing` otherwise, i.e. use the derivative of the squared cost or autodiff]\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `8`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`] See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `tunemethod`: The method - and its parameters - to employ for hyperparameters autotuning. See [`SuccessiveHalvingSearch`](@ref) for the default method. To implement automatic hyperparameter tuning during the (first) `fit!` call simply set `autotune=true` and eventually change the default `tunemethod` options (including the parameter ranges, the resources to employ and the loss function to adopt).\n\n * `descr`: An optional title and/or description for this model\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * use `transform` to obtain the encoded data, and `inverse_trasnform` to decode to the original data\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load AutoEncoder pkg = \"BetaML\" verbosity=0;\n\njulia> model = modelType(encoded_size=2,layers_size=10);\n\njulia> mach = machine(model, X)\nuntrained Machine; caches model-specific representations of data\n model: AutoEncoder(e_layers = nothing, …)\n args: \n 1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\njulia> fit!(mach,verbosity=2)\n[ Info: Training machine(AutoEncoder(e_layers = nothing, …), …).\n***\n*** Training for 200 epochs with algorithm BetaML.Nn.ADAM.\nTraining.. \t avg loss on epoch 1 (1): \t 35.48243542158747\nTraining.. \t avg loss on epoch 20 (20): \t 0.07528042222678126\nTraining.. \t avg loss on epoch 40 (40): \t 0.06293071729378613\nTraining.. \t avg loss on epoch 60 (60): \t 0.057035588828991145\nTraining.. \t avg loss on epoch 80 (80): \t 0.056313167754822875\nTraining.. \t avg loss on epoch 100 (100): \t 0.055521461091809436\nTraining the Neural Network... 52%|██████████████████████████████████████ | ETA: 0:00:01Training.. \t avg loss on epoch 120 (120): \t 0.06015206472927942\nTraining.. \t avg loss on epoch 140 (140): \t 0.05536835903285201\nTraining.. \t avg loss on epoch 160 (160): \t 0.05877560142428245\nTraining.. \t avg loss on epoch 180 (180): \t 0.05476302769966953\nTraining.. \t avg loss on epoch 200 (200): \t 0.049240864053557445\nTraining the Neural Network... 100%|█████████████████████████████████████████████████████████████████████████| Time: 0:00:01\nTraining of 200 epoch completed. Final epoch error: 0.049240864053557445.\ntrained Machine; caches model-specific representations of data\n model: AutoEncoder(e_layers = nothing, …)\n args: \n 1:\tSource @334 ⏎ Table{AbstractVector{Continuous}}\n\n\njulia> X_latent = transform(mach, X)\n150×2 Matrix{Float64}:\n 7.01701 -2.77285\n 6.50615 -2.9279\n 6.5233 -2.60754\n ⋮ \n 6.70196 -10.6059\n 6.46369 -11.1117\n 6.20212 -10.1323\n\njulia> X_recovered = inverse_transform(mach,X_latent)\n150×4 Matrix{Float64}:\n 5.04973 3.55838 1.43251 0.242215\n 4.73689 3.19985 1.44085 0.295257\n 4.65128 3.25308 1.30187 0.244354\n ⋮ \n 6.50077 2.93602 5.3303 1.87647\n 6.38639 2.83864 5.54395 2.04117\n 6.01595 2.67659 5.03669 1.83234\n\njulia> BetaML.relative_mean_error(MLJ.matrix(X),X_recovered)\n0.03387721261716176\n\n\n```\n""" -":name" = "AutoEncoder" -":human_name" = "auto encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:encoded_size, :layers_size, :e_layers, :d_layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :tunemethod, :descr, :rng)`" -":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"BetaML.Api.AutoTuneMethod\", \"String\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":transform_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":is_wrapper" = "`false`" [BetaML.DecisionTreeRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "BetaML.Bmlj.DecisionTreeRegressor" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "decision tree regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct DecisionTreeRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple Decision Tree model for regression with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. look at all features]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `variance`]. Either `variance` or a custom function. It can also be an anonymous function.\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load DecisionTreeRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Trees.DecisionTreeRegressor\n\njulia> model = modelType()\nDecisionTreeRegressor(\n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.variance, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(DecisionTreeRegressor(max_depth = 0, …), …).\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 26.35\n 21.6 21.6\n 34.7 34.8\n ⋮ \n 23.9 23.75\n 22.0 22.2\n 11.9 13.2\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "DecisionTreeRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct DecisionTreeRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple Decision Tree model for regression with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. look at all features]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `variance`]. Either `variance` or a custom function. It can also be an anonymous function.\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load DecisionTreeRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Trees.DecisionTreeRegressor\n\njulia> model = modelType()\nDecisionTreeRegressor(\n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.variance, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(DecisionTreeRegressor(max_depth = 0, …), …).\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 26.35\n 21.6 21.6\n 34.7 34.8\n ⋮ \n 23.9 23.75\n 22.0 22.2\n 11.9 13.2\n```\n""" -":name" = "DecisionTreeRegressor" -":human_name" = "decision tree regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.PegasosClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Function\", \"Float64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "BetaML.Bmlj.PegasosClassifier" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" -":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct PegasosClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe gradient-based linear \"pegasos\" classifier using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n * `learning_rate::Function`: Learning rate [def: (epoch -> 1/sqrt(epoch))]\n * `learning_rate_multiplicative::Float64`: Multiplicative term of the learning rate [def: `0.5`]\n * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones [def: `false`]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load PegasosClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Perceptron.PegasosClassifier\n\njulia> model = modelType()\nPegasosClassifier(\n initial_coefficients = nothing, \n initial_constant = nothing, \n learning_rate = BetaML.Perceptron.var\"#71#73\"(), \n learning_rate_multiplicative = 0.5, \n epochs = 1000, \n shuffle = true, \n force_origin = false, \n return_mean_hyperplane = false, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.817, versicolor=>0.153, virginica=>0.0301)\n UnivariateFinite{Multiclass{3}}(setosa=>0.791, versicolor=>0.177, virginica=>0.0318)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.5, virginica=>0.246)\n UnivariateFinite{Multiclass{3}}(setosa=>0.283, versicolor=>0.51, virginica=>0.207)\n```\n""" -":name" = "PegasosClassifier" +":hyperparameters" = "`(:initial_coefficients, :initial_constant, :learning_rate, :learning_rate_multiplicative, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`" +":is_pure_julia" = "`true`" ":human_name" = "pegasos classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:initial_coefficients, :initial_constant, :learning_rate, :learning_rate_multiplicative, :epochs, :shuffle, :force_origin, :return_mean_hyperplane, :rng)`" -":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Function\", \"Float64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[BetaML.KMeansClusterer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" -":package_license" = "MIT" -":load_path" = "BetaML.Bmlj.KMeansClusterer" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":docstring" = """```julia\nmutable struct PegasosClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe gradient-based linear \"pegasos\" classifier using one-vs-all for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `initial_coefficients::Union{Nothing, Matrix{Float64}}`: N-classes by D-dimensions matrix of initial linear coefficients [def: `nothing`, i.e. zeros]\n * `initial_constant::Union{Nothing, Vector{Float64}}`: N-classes vector of initial contant terms [def: `nothing`, i.e. zeros]\n * `learning_rate::Function`: Learning rate [def: (epoch -> 1/sqrt(epoch))]\n * `learning_rate_multiplicative::Float64`: Multiplicative term of the learning rate [def: `0.5`]\n * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `1000`]\n * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `force_origin::Bool`: Whether to force the parameter associated with the constant term to remain zero [def: `false`]\n * `return_mean_hyperplane::Bool`: Whether to return the average hyperplane coefficients instead of the final ones [def: `false`]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load PegasosClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Perceptron.PegasosClassifier\n\njulia> model = modelType()\nPegasosClassifier(\n initial_coefficients = nothing, \n initial_constant = nothing, \n learning_rate = BetaML.Perceptron.var\"#71#73\"(), \n learning_rate_multiplicative = 0.5, \n epochs = 1000, \n shuffle = true, \n force_origin = false, \n return_mean_hyperplane = false, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.817, versicolor=>0.153, virginica=>0.0301)\n UnivariateFinite{Multiclass{3}}(setosa=>0.791, versicolor=>0.177, virginica=>0.0318)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.5, virginica=>0.246)\n UnivariateFinite{Multiclass{3}}(setosa=>0.283, versicolor=>0.51, virginica=>0.207)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "PegasosClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct KMeansClusterer <: MLJModelInterface.Unsupervised\n```\n\nThe classical KMeansClusterer clustering algorithm, from the Beta Machine Learning Toolkit (BetaML).\n\n# Parameters:\n\n * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`), `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that, contrary to `KMedoidsClusterer`, the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one.\n * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n * \"random\": randomly in the X space\n * \"grid\": using a grid approach\n * \"shuffle\": selecting randomly within the available points [default]\n * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load KMeansClusterer pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMeansClusterer\n\njulia> model = modelType()\nKMeansClusterer(\n n_classes = 3, \n dist = BetaML.Clustering.var\"#34#36\"(), \n initialisation_strategy = \"shuffle\", \n initial_representatives = nothing, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMeansClusterer(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\" 2\n \"setosa\" 2\n \"setosa\" 2\n ⋮ \n \"virginica\" 3\n \"virginica\" 3\n \"virginica\" 1\n```\n""" -":name" = "KMeansClusterer" -":human_name" = "k means clusterer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:n_classes, :dist, :initialisation_strategy, :initial_representatives, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.NeuralNetworkRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" -":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" -":name" = "NeuralNetworkRegressor" +":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" +":is_pure_julia" = "`true`" ":human_name" = "neural network regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" -":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[BetaML.MultitargetGaussianMixtureRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractMatrix{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractMatrix{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" -":package_license" = "MIT" -":load_path" = "BetaML.Bmlj.MultitargetGaussianMixtureRegressor" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "NeuralNetworkRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct MultitargetGaussianMixtureRegressor <: MLJModelInterface.Deterministic\n```\n\nA non-linear regressor derived from fitting the data on a probabilistic model (Gaussian Mixture Model). Relatively fast but generally not very precise, except for data with a structure matching the chosen underlying mixture.\n\nThis is the multi-target version of the model. If you want to predict a single label (y), use the MLJ model [`GaussianMixtureRegressor`](@ref).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::Vector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes``) of the mixtures to employ (see the [`?GMM`](@ref GMM) module). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the`initialisation*strategy`parameter is set to \"gived\"` This parameter can also be given symply in term of a _type*. In this case it is automatically extended to a vector of `n_classes``mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def:`[DiagonalGaussian() for i in 1:n_classes]`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance (see notes).\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n\n * `maximum_iterations::Int64`: Maximum number of iterations [def: `typemax(Int64)`, i.e. ∞]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> ydouble = hcat(y, y .*2 .+5);\n\njulia> modelType = @load MultitargetGaussianMixtureRegressor pkg = \"BetaML\" verbosity=0\nBetaML.GMM.MultitargetGaussianMixtureRegressor\n\njulia> model = modelType()\nMultitargetGaussianMixtureRegressor(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"kmeans\", \n maximum_iterations = 9223372036854775807, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, ydouble);\n\njulia> fit!(mach);\n[ Info: Training machine(MultitargetGaussianMixtureRegressor(n_classes = 3, …), …).\nIter. 1: Var. of the post 20.46947926187522 Log-likelihood -23662.72770575145\n\njulia> ŷdouble = predict(mach, X)\n506×2 Matrix{Float64}:\n 23.3358 51.6717\n 23.3358 51.6717\n ⋮ \n 16.6843 38.3686\n 16.6843 38.3686\n```\n""" -":name" = "MultitargetGaussianMixtureRegressor" -":human_name" = "multitarget gaussian mixture regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :maximum_iterations, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[BetaML.GaussianMixtureRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" +[BetaML.KMeansClusterer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "BetaML.Bmlj.KMeansClusterer" +":hyperparameters" = "`(:n_classes, :dist, :initialisation_strategy, :initial_representatives, :rng)`" ":is_pure_julia" = "`true`" +":human_name" = "k means clusterer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct KMeansClusterer <: MLJModelInterface.Unsupervised\n```\n\nThe classical KMeansClusterer clustering algorithm, from the Beta Machine Learning Toolkit (BetaML).\n\n# Parameters:\n\n * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`), `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics. Attention that, contrary to `KMedoidsClusterer`, the `KMeansClusterer` algorithm is not guaranteed to converge with other distances than the Euclidean one.\n * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n * \"random\": randomly in the X space\n * \"grid\": using a grid approach\n * \"shuffle\": selecting randomly within the available points [default]\n * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load KMeansClusterer pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMeansClusterer\n\njulia> model = modelType()\nKMeansClusterer(\n n_classes = 3, \n dist = BetaML.Clustering.var\"#34#36\"(), \n initialisation_strategy = \"shuffle\", \n initial_representatives = nothing, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMeansClusterer(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\" 2\n \"setosa\" 2\n \"setosa\" 2\n ⋮ \n \"virginica\" 3\n \"virginica\" 3\n \"virginica\" 1\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" ":package_name" = "BetaML" -":package_license" = "MIT" -":load_path" = "BetaML.Bmlj.GaussianMixtureRegressor" +":name" = "KMeansClusterer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[BetaML.MultitargetGaussianMixtureRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "BetaML.Bmlj.MultitargetGaussianMixtureRegressor" +":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :maximum_iterations, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget gaussian mixture regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct MultitargetGaussianMixtureRegressor <: MLJModelInterface.Deterministic\n```\n\nA non-linear regressor derived from fitting the data on a probabilistic model (Gaussian Mixture Model). Relatively fast but generally not very precise, except for data with a structure matching the chosen underlying mixture.\n\nThis is the multi-target version of the model. If you want to predict a single label (y), use the MLJ model [`GaussianMixtureRegressor`](@ref).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::Vector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes``) of the mixtures to employ (see the [`?GMM`](@ref GMM) module). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the`initialisation*strategy`parameter is set to \"gived\"` This parameter can also be given symply in term of a _type*. In this case it is automatically extended to a vector of `n_classes``mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def:`[DiagonalGaussian() for i in 1:n_classes]`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance (see notes).\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n\n * `maximum_iterations::Int64`: Maximum number of iterations [def: `typemax(Int64)`, i.e. ∞]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> ydouble = hcat(y, y .*2 .+5);\n\njulia> modelType = @load MultitargetGaussianMixtureRegressor pkg = \"BetaML\" verbosity=0\nBetaML.GMM.MultitargetGaussianMixtureRegressor\n\njulia> model = modelType()\nMultitargetGaussianMixtureRegressor(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"kmeans\", \n maximum_iterations = 9223372036854775807, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, ydouble);\n\njulia> fit!(mach);\n[ Info: Training machine(MultitargetGaussianMixtureRegressor(n_classes = 3, …), …).\nIter. 1: Var. of the post 20.46947926187522 Log-likelihood -23662.72770575145\n\njulia> ŷdouble = predict(mach, X)\n506×2 Matrix{Float64}:\n 23.3358 51.6717\n 23.3358 51.6717\n ⋮ \n 16.6843 38.3686\n 16.6843 38.3686\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "MultitargetGaussianMixtureRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct GaussianMixtureRegressor <: MLJModelInterface.Deterministic\n```\n\nA non-linear regressor derived from fitting the data on a probabilistic model (Gaussian Mixture Model). Relatively fast but generally not very precise, except for data with a structure matching the chosen underlying mixture.\n\nThis is the single-target version of the model. If you want to predict several labels (y) at once, use the MLJ model [`MultitargetGaussianMixtureRegressor`](@ref).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::Vector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes``) of the mixtures to employ (see the [`?GMM`](@ref GMM) module). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the`initialisation*strategy`parameter is set to \"gived\"` This parameter can also be given symply in term of a _type*. In this case it is automatically extended to a vector of `n_classes``mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def:`[DiagonalGaussian() for i in 1:n_classes]`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance (see notes).\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n\n * `maximum_iterations::Int64`: Maximum number of iterations [def: `typemax(Int64)`, i.e. ∞]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load GaussianMixtureRegressor pkg = \"BetaML\" verbosity=0\nBetaML.GMM.GaussianMixtureRegressor\n\njulia> model = modelType()\nGaussianMixtureRegressor(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"kmeans\", \n maximum_iterations = 9223372036854775807, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(GaussianMixtureRegressor(n_classes = 3, …), …).\nIter. 1: Var. of the post 21.74887448784976 Log-likelihood -21687.09917379566\n\njulia> ŷ = predict(mach, X)\n506-element Vector{Float64}:\n 24.703442835305577\n 24.70344283512716\n ⋮\n 17.172486989759676\n 17.172486989759644\n```\n""" -":name" = "GaussianMixtureRegressor" -":human_name" = "gaussian mixture regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :maximum_iterations, :rng)`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractMatrix{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractMatrix{<:ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[BetaML.GaussianMixtureRegressor] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "BetaML.Bmlj.GaussianMixtureRegressor" +":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :maximum_iterations, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "gaussian mixture regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct GaussianMixtureRegressor <: MLJModelInterface.Deterministic\n```\n\nA non-linear regressor derived from fitting the data on a probabilistic model (Gaussian Mixture Model). Relatively fast but generally not very precise, except for data with a structure matching the chosen underlying mixture.\n\nThis is the single-target version of the model. If you want to predict several labels (y) at once, use the MLJ model [`MultitargetGaussianMixtureRegressor`](@ref).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::Vector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes``) of the mixtures to employ (see the [`?GMM`](@ref GMM) module). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the`initialisation*strategy`parameter is set to \"gived\"` This parameter can also be given symply in term of a _type*. In this case it is automatically extended to a vector of `n_classes``mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def:`[DiagonalGaussian() for i in 1:n_classes]`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance (see notes).\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n\n * `maximum_iterations::Int64`: Maximum number of iterations [def: `typemax(Int64)`, i.e. ∞]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load GaussianMixtureRegressor pkg = \"BetaML\" verbosity=0\nBetaML.GMM.GaussianMixtureRegressor\n\njulia> model = modelType()\nGaussianMixtureRegressor(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"kmeans\", \n maximum_iterations = 9223372036854775807, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(GaussianMixtureRegressor(n_classes = 3, …), …).\nIter. 1: Var. of the post 21.74887448784976 Log-likelihood -21687.09917379566\n\njulia> ŷ = predict(mach, X)\n506-element Vector{Float64}:\n 24.703442835305577\n 24.70344283512716\n ⋮\n 17.172486989759676\n 17.172486989759644\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":package_name" = "BetaML" +":name" = "GaussianMixtureRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.MultitargetNeuralNetworkRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":predict_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "BetaML.Bmlj.MultitargetNeuralNetworkRegressor" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct MultitargetNeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices.\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `300`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `BetaML.fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be a *n-records* by *n-dimensions* matrix\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> ydouble = hcat(y, y .*2 .+5);\n\njulia> modelType = @load MultitargetNeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.MultitargetNeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500)\nMultitargetNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253 … -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947 … -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564 … 0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 500, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, ydouble);\n\njulia> fit!(mach);\n\njulia> ŷdouble = predict(mach, X);\n\njulia> hcat(ydouble,ŷdouble)\n506×4 Matrix{Float64}:\n 24.0 53.0 28.4624 62.8607\n 21.6 48.2 22.665 49.7401\n 34.7 74.4 31.5602 67.9433\n 33.4 71.8 33.0869 72.4337\n ⋮ \n 23.9 52.8 23.3573 50.654\n 22.0 49.0 22.1141 48.5926\n 11.9 28.8 19.9639 45.5823\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "MultitargetNeuralNetworkRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct MultitargetNeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of multiple dimensional targets.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices.\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `300`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `BetaML.fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be a *n-records* by *n-dimensions* matrix\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> ydouble = hcat(y, y .*2 .+5);\n\njulia> modelType = @load MultitargetNeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.MultitargetNeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,50,f=BetaML.relu),BetaML.DenseLayer(50,2,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM(),epochs=500)\nMultitargetNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.2591582523441157 -0.027962845131416225 … 0.16044535560124418 -0.12838827994676857; -0.30381834909561184 0.2405495243851402 … -0.2588144861880588 0.09538577909777807; … ; -0.017320292924711156 -0.14042266424603767 … 0.06366999105841187 -0.13419651752478906; 0.07393079961409338 0.24521350531110264 … 0.04256867886217541 -0.0895506802948175], [0.14249427336553644, 0.24719379413682485, -0.25595911822556566, 0.10034088778965933, -0.017086404878505712, 0.21932184025609347, -0.031413516834861266, -0.12569076082247596, -0.18080140982481183, 0.14551901873323253 … -0.13321995621967364, 0.2436582233332092, 0.0552222336976439, 0.07000814133633904, 0.2280064379660025, -0.28885681475734193, -0.07414214246290696, -0.06783184733650621, -0.055318068046308455, -0.2573488383282579], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.0395424111703751 -0.22531232360829911 … -0.04341228943744482 0.024336206858365517; -0.16481887432946268 0.17798073384748508 … -0.18594039305095766 0.051159225856547474; … ; -0.011639475293705043 -0.02347011206244673 … 0.20508869536159186 -0.1158382446274592; -0.19078069527757857 -0.007487540070740484 … -0.21341165344291158 -0.24158671316310726], [-0.04283623889330032, 0.14924461547060602, -0.17039563392959683, 0.00907774027816255, 0.21738885963113852, -0.06308040225941691, -0.14683286822101105, 0.21726892197970937, 0.19784321784707126, -0.0344988665714947 … -0.23643089430602846, -0.013560425201427584, 0.05323948910726356, -0.04644175812567475, -0.2350400292671211, 0.09628312383424742, 0.07016420995205697, -0.23266392927140334, -0.18823664451487, 0.2304486691429084], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.11504184627266828 0.08601794194664503 … 0.03843129724045469 -0.18417305624127284; 0.10181551438831654 0.13459759904443674 … 0.11094951365942118 -0.1549466590355218; … ; 0.15279817525427697 0.0846661196058916 … -0.07993619892911122 0.07145402617285884; -0.1614160186346092 -0.13032002335149 … -0.12310552194729624 -0.15915773071049827], [-0.03435885900946367, -0.1198543931290306, 0.008454985905194445, -0.17980887188986966, -0.03557204910359624, 0.19125847393334877, -0.10949700778538696, -0.09343206702591, -0.12229583511781811, -0.09123969069220564 … 0.22119233518322862, 0.2053873143308657, 0.12756489387198222, 0.11567243705173319, -0.20982445664020496, 0.1595157838386987, -0.02087331046544119, -0.20556423263489765, -0.1622837764237961, -0.019220998739847395], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.25796717031347993 0.17579536633402948 … -0.09992960168785256 -0.09426177454620635; -0.026436330246675632 0.18070899284865127 … -0.19310119102392206 -0.06904005900252091], [0.16133004882307822, -0.3061228721091248], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 500, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, ydouble);\n\njulia> fit!(mach);\n\njulia> ŷdouble = predict(mach, X);\n\njulia> hcat(ydouble,ŷdouble)\n506×4 Matrix{Float64}:\n 24.0 53.0 28.4624 62.8607\n 21.6 48.2 22.665 49.7401\n 34.7 74.4 31.5602 67.9433\n 33.4 71.8 33.0869 72.4337\n ⋮ \n 23.9 52.8 23.3573 50.654\n 22.0 49.0 22.1141 48.5926\n 11.9 28.8 19.9639 45.5823\n```\n""" -":name" = "MultitargetNeuralNetworkRegressor" -":human_name" = "multitarget neural network regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" -":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.DecisionTreeClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "BetaML.Bmlj.DecisionTreeClassifier" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "decision tree classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct DecisionTreeClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple Decision Tree model for classification with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. look at all features]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `gini`]. Either `gini`, `entropy` or a custom function. It can also be an anonymous function.\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load DecisionTreeClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Trees.DecisionTreeClassifier\n\njulia> model = modelType()\nDecisionTreeClassifier(\n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.gini, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(DecisionTreeClassifier(max_depth = 0, …), …).\n\njulia> cat_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "DecisionTreeClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct DecisionTreeClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple Decision Tree model for classification with support for Missing data, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `max_depth::Int64`: The maximum depth the tree is allowed to reach. When this is reached the node is forced to become a leaf [def: `0`, i.e. no limits]\n * `min_gain::Float64`: The minimum information gain to allow for a node's partition [def: `0`]\n * `min_records::Int64`: The minimum number of records a node must holds to consider for a partition of it [def: `2`]\n * `max_features::Int64`: The maximum number of (random) features to consider at each partitioning [def: `0`, i.e. look at all features]\n * `splitting_criterion::Function`: This is the name of the function to be used to compute the information gain of a specific partition. This is done by measuring the difference betwwen the \"impurity\" of the labels of the parent node with those of the two child nodes, weighted by the respective number of items. [def: `gini`]. Either `gini`, `entropy` or a custom function. It can also be an anonymous function.\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load DecisionTreeClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Trees.DecisionTreeClassifier\n\njulia> model = modelType()\nDecisionTreeClassifier(\n max_depth = 0, \n min_gain = 0.0, \n min_records = 2, \n max_features = 0, \n splitting_criterion = BetaML.Utils.gini, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n[ Info: Training machine(DecisionTreeClassifier(max_depth = 0, …), …).\n\njulia> cat_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n UnivariateFinite{Multiclass{3}}(setosa=>1.0, versicolor=>0.0, virginica=>0.0)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.0, virginica=>1.0)\n```\n""" -":name" = "DecisionTreeClassifier" -":human_name" = "decision tree classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:max_depth, :min_gain, :min_records, :max_features, :splitting_criterion, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.GeneralImputer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{String, Vector{Int64}}\", \"Any\", \"Union{Bool, Vector{Bool}}\", \"Union{Function, Vector{Function}}\", \"Union{Function, Vector{Function}}\", \"Int64\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "BetaML.Bmlj.GeneralImputer" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:cols_to_impute, :estimator, :missing_supported, :fit_function, :predict_function, :recursive_passages, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "general imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct GeneralImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using arbitrary learning models, from the Beta Machine Learning Toolkit (BetaML).\n\nImpute missing values using a vector (one per column) of arbitrary learning models (classifiers/regressors, not necessarily from BetaML) that implement the interface `m = Model([options])`, `train!(m,X,Y)` and `predict(m,X)`.\n\n# Hyperparameters:\n\n * `cols_to_impute::Union{String, Vector{Int64}}`: Columns in the matrix for which to create an imputation model, i.e. to impute. It can be a vector of columns IDs (positions), or the keywords \"auto\" (default) or \"all\". With \"auto\" the model automatically detects the columns with missing data and impute only them. You may manually specify the columns or use \"all\" if you want to create a imputation model for that columns during training even if all training data are non-missing to apply then the training model to further data with possibly missing values.\n * `estimator::Any`: An entimator model (regressor or classifier), with eventually its options (hyper-parameters), to be used to impute the various columns of the matrix. It can also be a `cols_to_impute`-length vector of different estimators to consider a different estimator for each column (dimension) to impute, for example when some columns are categorical (and will hence require a classifier) and some others are numerical (hence requiring a regressor). [default: `nothing`, i.e. use BetaML random forests, handling classification and regression jobs automatically].\n * `missing_supported::Union{Bool, Vector{Bool}}`: Wheter the estimator(s) used to predict the missing data support itself missing data in the training features (X). If not, when the model for a certain dimension is fitted, dimensions with missing data in the same rows of those where imputation is needed are dropped and then only non-missing rows in the other remaining dimensions are considered. It can be a vector of boolean values to specify this property for each individual estimator or a single booleann value to apply to all the estimators [default: `false`]\n * `fit_function::Union{Function, Vector{Function}}`: The function used by the estimator(s) to fit the model. It should take as fist argument the model itself, as second argument a matrix representing the features, and as third argument a vector representing the labels. This parameter is mandatory for non-BetaML estimators and can be a single value or a vector (one per estimator) in case of different estimator packages used. [default: `BetaML.fit!`]\n * `predict_function::Union{Function, Vector{Function}}`: The function used by the estimator(s) to predict the labels. It should take as fist argument the model itself and as second argument a matrix representing the features. This parameter is mandatory for non-BetaML estimators and can be a single value or a vector (one per estimator) in case of different estimator packages used. [default: `BetaML.predict`]\n * `recursive_passages::Int64`: Define the number of times to go trough the various columns to impute their data. Useful when there are data to impute on multiple columns. The order of the first passage is given by the decreasing number of missing values per column, the other passages are random [default: `1`].\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]. Note that this influence only the specific GeneralImputer code, the individual estimators may have their own rng (or similar) parameter.\n\n# Examples :\n\n * *Using BetaML models*:\n\n```julia\njulia> using MLJ;\njulia> import BetaML # The library from which to get the individual estimators to be used for each column imputation\njulia> X = [\"a\" 8.2;\n \"a\" missing;\n \"a\" 7.8;\n \"b\" 21;\n \"b\" 18;\n \"c\" -0.9;\n missing 20;\n \"c\" -1.8;\n missing -2.3;\n \"c\" -2.4] |> table ;\njulia> modelType = @load GeneralImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.GeneralImputer\njulia> model = modelType(estimator=BetaML.DecisionTreeEstimator(),recursive_passages=2);\njulia> mach = machine(model, X);\njulia> fit!(mach);\n[ Info: Training machine(GeneralImputer(cols_to_impute = auto, …), …).\njulia> X_full = transform(mach) |> MLJ.matrix\n10×2 Matrix{Any}:\n \"a\" 8.2\n \"a\" 8.0\n \"a\" 7.8\n \"b\" 21\n \"b\" 18\n \"c\" -0.9\n \"b\" 20\n \"c\" -1.8\n \"c\" -2.3\n \"c\" -2.4\n```\n\n * *Using third party packages* (in this example `DecisionTree`):\n\n```julia\njulia> using MLJ;\njulia> import DecisionTree # An example of external estimators to be used for each column imputation\njulia> X = [\"a\" 8.2;\n \"a\" missing;\n \"a\" 7.8;\n \"b\" 21;\n \"b\" 18;\n \"c\" -0.9;\n missing 20;\n \"c\" -1.8;\n missing -2.3;\n \"c\" -2.4] |> table ;\njulia> modelType = @load GeneralImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.GeneralImputer\njulia> model = modelType(estimator=[DecisionTree.DecisionTreeClassifier(),DecisionTree.DecisionTreeRegressor()], fit_function=DecisionTree.fit!,predict_function=DecisionTree.predict,recursive_passages=2);\njulia> mach = machine(model, X);\njulia> fit!(mach);\n[ Info: Training machine(GeneralImputer(cols_to_impute = auto, …), …).\njulia> X_full = transform(mach) |> MLJ.matrix\n10×2 Matrix{Any}:\n \"a\" 8.2\n \"a\" 7.51111\n \"a\" 7.8\n \"b\" 21\n \"b\" 18\n \"c\" -0.9\n \"b\" 20\n \"c\" -1.8\n \"c\" -2.3\n \"c\" -2.4\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "GeneralImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct GeneralImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using arbitrary learning models, from the Beta Machine Learning Toolkit (BetaML).\n\nImpute missing values using a vector (one per column) of arbitrary learning models (classifiers/regressors, not necessarily from BetaML) that implement the interface `m = Model([options])`, `train!(m,X,Y)` and `predict(m,X)`.\n\n# Hyperparameters:\n\n * `cols_to_impute::Union{String, Vector{Int64}}`: Columns in the matrix for which to create an imputation model, i.e. to impute. It can be a vector of columns IDs (positions), or the keywords \"auto\" (default) or \"all\". With \"auto\" the model automatically detects the columns with missing data and impute only them. You may manually specify the columns or use \"all\" if you want to create a imputation model for that columns during training even if all training data are non-missing to apply then the training model to further data with possibly missing values.\n * `estimator::Any`: An entimator model (regressor or classifier), with eventually its options (hyper-parameters), to be used to impute the various columns of the matrix. It can also be a `cols_to_impute`-length vector of different estimators to consider a different estimator for each column (dimension) to impute, for example when some columns are categorical (and will hence require a classifier) and some others are numerical (hence requiring a regressor). [default: `nothing`, i.e. use BetaML random forests, handling classification and regression jobs automatically].\n * `missing_supported::Union{Bool, Vector{Bool}}`: Wheter the estimator(s) used to predict the missing data support itself missing data in the training features (X). If not, when the model for a certain dimension is fitted, dimensions with missing data in the same rows of those where imputation is needed are dropped and then only non-missing rows in the other remaining dimensions are considered. It can be a vector of boolean values to specify this property for each individual estimator or a single booleann value to apply to all the estimators [default: `false`]\n * `fit_function::Union{Function, Vector{Function}}`: The function used by the estimator(s) to fit the model. It should take as fist argument the model itself, as second argument a matrix representing the features, and as third argument a vector representing the labels. This parameter is mandatory for non-BetaML estimators and can be a single value or a vector (one per estimator) in case of different estimator packages used. [default: `BetaML.fit!`]\n * `predict_function::Union{Function, Vector{Function}}`: The function used by the estimator(s) to predict the labels. It should take as fist argument the model itself and as second argument a matrix representing the features. This parameter is mandatory for non-BetaML estimators and can be a single value or a vector (one per estimator) in case of different estimator packages used. [default: `BetaML.predict`]\n * `recursive_passages::Int64`: Define the number of times to go trough the various columns to impute their data. Useful when there are data to impute on multiple columns. The order of the first passage is given by the decreasing number of missing values per column, the other passages are random [default: `1`].\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]. Note that this influence only the specific GeneralImputer code, the individual estimators may have their own rng (or similar) parameter.\n\n# Examples :\n\n * *Using BetaML models*:\n\n```julia\njulia> using MLJ;\njulia> import BetaML # The library from which to get the individual estimators to be used for each column imputation\njulia> X = [\"a\" 8.2;\n \"a\" missing;\n \"a\" 7.8;\n \"b\" 21;\n \"b\" 18;\n \"c\" -0.9;\n missing 20;\n \"c\" -1.8;\n missing -2.3;\n \"c\" -2.4] |> table ;\njulia> modelType = @load GeneralImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.GeneralImputer\njulia> model = modelType(estimator=BetaML.DecisionTreeEstimator(),recursive_passages=2);\njulia> mach = machine(model, X);\njulia> fit!(mach);\n[ Info: Training machine(GeneralImputer(cols_to_impute = auto, …), …).\njulia> X_full = transform(mach) |> MLJ.matrix\n10×2 Matrix{Any}:\n \"a\" 8.2\n \"a\" 8.0\n \"a\" 7.8\n \"b\" 21\n \"b\" 18\n \"c\" -0.9\n \"b\" 20\n \"c\" -1.8\n \"c\" -2.3\n \"c\" -2.4\n```\n\n * *Using third party packages* (in this example `DecisionTree`):\n\n```julia\njulia> using MLJ;\njulia> import DecisionTree # An example of external estimators to be used for each column imputation\njulia> X = [\"a\" 8.2;\n \"a\" missing;\n \"a\" 7.8;\n \"b\" 21;\n \"b\" 18;\n \"c\" -0.9;\n missing 20;\n \"c\" -1.8;\n missing -2.3;\n \"c\" -2.4] |> table ;\njulia> modelType = @load GeneralImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.GeneralImputer\njulia> model = modelType(estimator=[DecisionTree.DecisionTreeClassifier(),DecisionTree.DecisionTreeRegressor()], fit_function=DecisionTree.fit!,predict_function=DecisionTree.predict,recursive_passages=2);\njulia> mach = machine(model, X);\njulia> fit!(mach);\n[ Info: Training machine(GeneralImputer(cols_to_impute = auto, …), …).\njulia> X_full = transform(mach) |> MLJ.matrix\n10×2 Matrix{Any}:\n \"a\" 8.2\n \"a\" 7.51111\n \"a\" 7.8\n \"b\" 21\n \"b\" 18\n \"c\" -0.9\n \"b\" 20\n \"c\" -1.8\n \"c\" -2.3\n \"c\" -2.4\n```\n""" -":name" = "GeneralImputer" -":human_name" = "general imputer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":transform"] -":hyperparameters" = "`(:cols_to_impute, :estimator, :missing_supported, :fit_function, :predict_function, :recursive_passages, :rng)`" -":hyperparameter_types" = "`(\"Union{String, Vector{Int64}}\", \"Any\", \"Union{Bool, Vector{Bool}}\", \"Union{Function, Vector{Function}}\", \"Union{Function, Vector{Function}}\", \"Int64\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" +":is_wrapper" = "`false`" [BetaML.NeuralNetworkClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Union{Nothing, Vector}\", \"String\", \"Any\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "BetaML.Bmlj.NeuralNetworkClassifier" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :categories, :handle_unknown, :other_categories_name, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct NeuralNetworkClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification problems.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added.\n * `loss`: Loss (cost) function [def: `BetaML.crossentropy`]. Should always assume y and ŷ as matrices.\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `BetaML.fitting_info`]\n * `categories`: The categories to represent as columns. [def: `nothing`, i.e. unique training values].\n * `handle_unknown`: How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories.\n * `other_categories_name`: Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: `nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings\n * `rng`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be a *n-records* by *n-dimensions* matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load NeuralNetworkClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkClassifier\n\njulia> layers = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM())\nNeuralNetworkClassifier(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.376173352338049 0.7029289511758696 -0.5589563304592478 -0.21043274001651874; 0.044758889527899415 0.6687689636685921 0.4584331114653877 0.6820506583840453; … ; -0.26546358457167507 -0.28469736227283804 -0.164225549922154 -0.516785639164486; -0.5146043550684141 -0.0699113265130964 0.14959906603941908 -0.053706860039406834], [0.7003943613125758, -0.23990840466587576, -0.23823126271387746, 0.4018101580410387, 0.2274483050356888, -0.564975060667734, 0.1732063297031089, 0.11880299829896945], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.029467850439546583 0.4074661266592745 … 0.36775675246760053 -0.595524555448422; 0.42455597698371306 -0.2458082732997091 … -0.3324220683462514 0.44439454998610595; … ; -0.2890883863364267 -0.10109249362508033 … -0.0602680568207582 0.18177278845097555; -0.03432587226449335 -0.4301192922760063 … 0.5646018168286626 0.47269177680892693], [0.13777442835428688, 0.5473306726675433, 0.3781939472904011, 0.24021813428130567, -0.0714779477402877, -0.020386373530818958, 0.5465466618404464, -0.40339790713616525], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([0.6565120540082393 0.7139211611842745 … 0.07809812467915389 -0.49346311403373844; -0.4544472987041656 0.6502667641568863 … 0.43634608676548214 0.7213049952968921; 0.41212264783075303 -0.21993289366360613 … 0.25365007887755064 -0.5664469566269569], [-0.6911986792747682, -0.2149343209329364, -0.6347727539063817], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, BetaML.Utils.dsoftmax, nothing)], \n loss = BetaML.Utils.crossentropy, \n dloss = BetaML.Utils.dcrossentropy, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n categories = nothing, \n handle_unknown = \"error\", \n other_categories_name = nothing, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> classes_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.575, versicolor=>0.213, virginica=>0.213)\n UnivariateFinite{Multiclass{3}}(setosa=>0.573, versicolor=>0.213, virginica=>0.213)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.236, versicolor=>0.236, virginica=>0.529)\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "NeuralNetworkClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct NeuralNetworkClassifier <: MLJModelInterface.Probabilistic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for classification problems.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers. The last \"softmax\" layer is automatically added.\n * `loss`: Loss (cost) function [def: `BetaML.crossentropy`]. Should always assume y and ŷ as matrices.\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dcrossentropy`, i.e. the derivative of the cross-entropy]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `BetaML.fitting_info`]\n * `categories`: The categories to represent as columns. [def: `nothing`, i.e. unique training values].\n * `handle_unknown`: How to handle categories not seens in training or not present in the provided `categories` array? \"error\" (default) rises an error, \"infrequent\" adds a specific column for these categories.\n * `other_categories_name`: Which value during prediction to assign to this \"other\" category (i.e. categories not seen on training or not present in the provided `categories` array? [def: `nothing`, i.e. typemax(Int64) for integer vectors and \"other\" for other types]. This setting is active only if `handle_unknown=\"infrequent\"` and in that case it MUST be specified if Y is neither integer or strings\n * `rng`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be a *n-records* by *n-dimensions* matrix (e.g. a one-hot-encoded data for classification), where the output columns should be interpreted as the probabilities for each categories.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load NeuralNetworkClassifier pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkClassifier\n\njulia> layers = [BetaML.DenseLayer(4,8,f=BetaML.relu),BetaML.DenseLayer(8,8,f=BetaML.relu),BetaML.DenseLayer(8,3,f=BetaML.relu),BetaML.VectorFunctionLayer(3,f=BetaML.softmax)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM())\nNeuralNetworkClassifier(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.376173352338049 0.7029289511758696 -0.5589563304592478 -0.21043274001651874; 0.044758889527899415 0.6687689636685921 0.4584331114653877 0.6820506583840453; … ; -0.26546358457167507 -0.28469736227283804 -0.164225549922154 -0.516785639164486; -0.5146043550684141 -0.0699113265130964 0.14959906603941908 -0.053706860039406834], [0.7003943613125758, -0.23990840466587576, -0.23823126271387746, 0.4018101580410387, 0.2274483050356888, -0.564975060667734, 0.1732063297031089, 0.11880299829896945], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.029467850439546583 0.4074661266592745 … 0.36775675246760053 -0.595524555448422; 0.42455597698371306 -0.2458082732997091 … -0.3324220683462514 0.44439454998610595; … ; -0.2890883863364267 -0.10109249362508033 … -0.0602680568207582 0.18177278845097555; -0.03432587226449335 -0.4301192922760063 … 0.5646018168286626 0.47269177680892693], [0.13777442835428688, 0.5473306726675433, 0.3781939472904011, 0.24021813428130567, -0.0714779477402877, -0.020386373530818958, 0.5465466618404464, -0.40339790713616525], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([0.6565120540082393 0.7139211611842745 … 0.07809812467915389 -0.49346311403373844; -0.4544472987041656 0.6502667641568863 … 0.43634608676548214 0.7213049952968921; 0.41212264783075303 -0.21993289366360613 … 0.25365007887755064 -0.5664469566269569], [-0.6911986792747682, -0.2149343209329364, -0.6347727539063817], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.VectorFunctionLayer{0}(fill(NaN), 3, 3, BetaML.Utils.softmax, BetaML.Utils.dsoftmax, nothing)], \n loss = BetaML.Utils.crossentropy, \n dloss = BetaML.Utils.dcrossentropy, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n categories = nothing, \n handle_unknown = \"error\", \n other_categories_name = nothing, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> classes_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.575, versicolor=>0.213, virginica=>0.213)\n UnivariateFinite{Multiclass{3}}(setosa=>0.573, versicolor=>0.213, virginica=>0.213)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.236, versicolor=>0.236, virginica=>0.529)\n UnivariateFinite{Multiclass{3}}(setosa=>0.254, versicolor=>0.254, virginica=>0.492)\n```\n""" -":name" = "NeuralNetworkClassifier" -":human_name" = "neural network classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :categories, :handle_unknown, :other_categories_name, :rng)`" -":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Union{Nothing, Vector}\", \"String\", \"Any\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.SimpleImputer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Function\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "BetaML.Bmlj.SimpleImputer" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:statistic, :norm)`" +":is_pure_julia" = "`true`" +":human_name" = "simple imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct SimpleImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using feature (column) mean, with optional record normalisation (using l-`norm` norms), from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `statistic::Function`: The descriptive statistic of the column (feature) to use as imputed value [def: `mean`]\n * `norm::Union{Nothing, Int64}`: Normalise the feature mean by l-`norm` norm of the records [default: `nothing`]. Use it (e.g. `norm=1` to use the l-1 norm) if the records are highly heterogeneus (e.g. quantity exports of different countries).\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] |> table ;\n\njulia> modelType = @load SimpleImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.SimpleImputer\n\njulia> model = modelType(norm=1)\nSimpleImputer(\n statistic = Statistics.mean, \n norm = 1)\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(SimpleImputer(statistic = mean, …), …).\n\njulia> X_full = transform(mach) |> MLJ.matrix\n9×2 Matrix{Float64}:\n 1.0 10.5\n 1.5 0.295466\n 1.8 8.0\n 1.7 15.0\n 3.2 40.0\n 0.280952 1.69524\n 3.3 38.0\n 0.0750839 -2.3\n 5.2 -2.4\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "SimpleImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct SimpleImputer <: MLJModelInterface.Unsupervised\n```\n\nImpute missing values using feature (column) mean, with optional record normalisation (using l-`norm` norms), from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `statistic::Function`: The descriptive statistic of the column (feature) to use as imputed value [def: `mean`]\n * `norm::Union{Nothing, Int64}`: Normalise the feature mean by l-`norm` norm of the records [default: `nothing`]. Use it (e.g. `norm=1` to use the l-1 norm) if the records are highly heterogeneus (e.g. quantity exports of different countries).\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X = [1 10.5;1.5 missing; 1.8 8; 1.7 15; 3.2 40; missing missing; 3.3 38; missing -2.3; 5.2 -2.4] |> table ;\n\njulia> modelType = @load SimpleImputer pkg = \"BetaML\" verbosity=0\nBetaML.Imputation.SimpleImputer\n\njulia> model = modelType(norm=1)\nSimpleImputer(\n statistic = Statistics.mean, \n norm = 1)\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(SimpleImputer(statistic = mean, …), …).\n\njulia> X_full = transform(mach) |> MLJ.matrix\n9×2 Matrix{Float64}:\n 1.0 10.5\n 1.5 0.295466\n 1.8 8.0\n 1.7 15.0\n 3.2 40.0\n 0.280952 1.69524\n 3.3 38.0\n 0.0750839 -2.3\n 5.2 -2.4\n```\n""" -":name" = "SimpleImputer" -":human_name" = "simple imputer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":transform"] -":hyperparameters" = "`(:statistic, :norm)`" -":hyperparameter_types" = "`(\"Function\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [BetaML.GaussianMixtureClusterer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" -":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"AbstractVector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "BetaML.Bmlj.GaussianMixtureClusterer" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :maximum_iterations, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "gaussian mixture clusterer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct GaussianMixtureClusterer <: MLJModelInterface.Unsupervised\n```\n\nA Expectation-Maximisation clustering algorithm with customisable mixtures, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::AbstractVector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes`) of the mixtures to employ (see the [`?GMM`](@ref GMM) module). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the `initialisation_strategy` parameter is set to \"gived\". This parameter can also be given symply in term of a *type*. In this case it is automatically extended to a vector of `n_classes` mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def: `[DiagonalGaussian() for i in 1:n_classes]`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance (see notes).\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n * `maximum_iterations::Int64`: Maximum number of iterations [def: `typemax(Int64)`, i.e. ∞]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\n\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load GaussianMixtureClusterer pkg = \"BetaML\" verbosity=0\nBetaML.GMM.GaussianMixtureClusterer\n\njulia> model = modelType()\nGaussianMixtureClusterer(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"kmeans\", \n maximum_iterations = 9223372036854775807, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(GaussianMixtureClusterer(n_classes = 3, …), …).\nIter. 1: Var. of the post 10.800150114964184 Log-likelihood -650.0186451891216\n\njulia> classes_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, Int64, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>4.17e-15, 3=>2.1900000000000003e-31)\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>1.25e-13, 3=>5.87e-31)\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>4.5e-15, 3=>1.55e-32)\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>6.93e-14, 3=>3.37e-31)\n ⋮\n UnivariateFinite{Multiclass{3}}(1=>5.39e-25, 2=>0.0167, 3=>0.983)\n UnivariateFinite{Multiclass{3}}(1=>7.5e-29, 2=>0.000106, 3=>1.0)\n UnivariateFinite{Multiclass{3}}(1=>1.6e-20, 2=>0.594, 3=>0.406)\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "GaussianMixtureClusterer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct GaussianMixtureClusterer <: MLJModelInterface.Unsupervised\n```\n\nA Expectation-Maximisation clustering algorithm with customisable mixtures, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `n_classes::Int64`: Number of mixtures (latent classes) to consider [def: 3]\n * `initial_probmixtures::AbstractVector{Float64}`: Initial probabilities of the categorical distribution (n_classes x 1) [default: `[]`]\n * `mixtures::Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}`: An array (of length `n_classes`) of the mixtures to employ (see the [`?GMM`](@ref GMM) module). Each mixture object can be provided with or without its parameters (e.g. mean and variance for the gaussian ones). Fully qualified mixtures are useful only if the `initialisation_strategy` parameter is set to \"gived\". This parameter can also be given symply in term of a *type*. In this case it is automatically extended to a vector of `n_classes` mixtures of the specified type. Note that mixing of different mixture types is not currently supported. [def: `[DiagonalGaussian() for i in 1:n_classes]`]\n * `tol::Float64`: Tolerance to stop the algorithm [default: 10^(-6)]\n * `minimum_variance::Float64`: Minimum variance for the mixtures [default: 0.05]\n * `minimum_covariance::Float64`: Minimum covariance for the mixtures with full covariance matrix [default: 0]. This should be set different than minimum_variance (see notes).\n * `initialisation_strategy::String`: The computation method of the vector of the initial mixtures. One of the following:\n\n * \"grid\": using a grid approach\n * \"given\": using the mixture provided in the fully qualified `mixtures` parameter\n * \"kmeans\": use first kmeans (itself initialised with a \"grid\" strategy) to set the initial mixture centers [default]\n\n Note that currently \"random\" and \"shuffle\" initialisations are not supported in gmm-based algorithms.\n * `maximum_iterations::Int64`: Maximum number of iterations [def: `typemax(Int64)`, i.e. ∞]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\n\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load GaussianMixtureClusterer pkg = \"BetaML\" verbosity=0\nBetaML.GMM.GaussianMixtureClusterer\n\njulia> model = modelType()\nGaussianMixtureClusterer(\n n_classes = 3, \n initial_probmixtures = Float64[], \n mixtures = BetaML.GMM.DiagonalGaussian{Float64}[BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing), BetaML.GMM.DiagonalGaussian{Float64}(nothing, nothing)], \n tol = 1.0e-6, \n minimum_variance = 0.05, \n minimum_covariance = 0.0, \n initialisation_strategy = \"kmeans\", \n maximum_iterations = 9223372036854775807, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(GaussianMixtureClusterer(n_classes = 3, …), …).\nIter. 1: Var. of the post 10.800150114964184 Log-likelihood -650.0186451891216\n\njulia> classes_est = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, Int64, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>4.17e-15, 3=>2.1900000000000003e-31)\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>1.25e-13, 3=>5.87e-31)\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>4.5e-15, 3=>1.55e-32)\n UnivariateFinite{Multiclass{3}}(1=>1.0, 2=>6.93e-14, 3=>3.37e-31)\n ⋮\n UnivariateFinite{Multiclass{3}}(1=>5.39e-25, 2=>0.0167, 3=>0.983)\n UnivariateFinite{Multiclass{3}}(1=>7.5e-29, 2=>0.000106, 3=>1.0)\n UnivariateFinite{Multiclass{3}}(1=>1.6e-20, 2=>0.594, 3=>0.406)\n```\n""" -":name" = "GaussianMixtureClusterer" -":human_name" = "gaussian mixture clusterer" -":is_supervised" = "`false`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:n_classes, :initial_probmixtures, :mixtures, :tol, :minimum_variance, :minimum_covariance, :initialisation_strategy, :maximum_iterations, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"AbstractVector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" +":is_wrapper" = "`false`" [BetaML.KernelPerceptronClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Function\", \"Int64\", \"Union{Nothing, Vector{Vector{Int64}}}\", \"Bool\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "BetaML.Bmlj.KernelPerceptronClassifier" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:kernel, :epochs, :initial_errors, :shuffle, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "kernel perceptron classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct KernelPerceptronClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe kernel perceptron algorithm using one-vs-one for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `kernel::Function`: Kernel function to employ. See `?radial_kernel` or `?polynomial_kernel` (once loaded the BetaML package) for details or check `?BetaML.Utils` to verify if other kernels are defined (you can alsways define your own kernel) [def: [`radial_kernel`](@ref)]\n * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `100`]\n * `initial_errors::Union{Nothing, Vector{Vector{Int64}}}`: Initial distribution of the number of errors errors [def: `nothing`, i.e. zeros]. If provided, this should be a nModels-lenght vector of nRecords integer values vectors , where nModels is computed as `(n_classes * (n_classes - 1)) / 2`\n * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load KernelPerceptronClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.KernelPerceptronClassifier\n\njulia> model = modelType()\nKernelPerceptronClassifier(\n kernel = BetaML.Utils.radial_kernel, \n epochs = 100, \n initial_errors = nothing, \n shuffle = true, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.245, virginica=>0.665)\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.665, virginica=>0.245)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "KernelPerceptronClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct KernelPerceptronClassifier <: MLJModelInterface.Probabilistic\n```\n\nThe kernel perceptron algorithm using one-vs-one for multiclass, from the Beta Machine Learning Toolkit (BetaML).\n\n# Hyperparameters:\n\n * `kernel::Function`: Kernel function to employ. See `?radial_kernel` or `?polynomial_kernel` (once loaded the BetaML package) for details or check `?BetaML.Utils` to verify if other kernels are defined (you can alsways define your own kernel) [def: [`radial_kernel`](@ref)]\n * `epochs::Int64`: Maximum number of epochs, i.e. passages trough the whole training sample [def: `100`]\n * `initial_errors::Union{Nothing, Vector{Vector{Int64}}}`: Initial distribution of the number of errors errors [def: `nothing`, i.e. zeros]. If provided, this should be a nModels-lenght vector of nRecords integer values vectors , where nModels is computed as `(n_classes * (n_classes - 1)) / 2`\n * `shuffle::Bool`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `rng::Random.AbstractRNG`: A Random Number Generator to be used in stochastic parts of the code [deafult: `Random.GLOBAL_RNG`]\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load KernelPerceptronClassifier pkg = \"BetaML\"\n[ Info: For silent loading, specify `verbosity=0`. \nimport BetaML ✔\nBetaML.Perceptron.KernelPerceptronClassifier\n\njulia> model = modelType()\nKernelPerceptronClassifier(\n kernel = BetaML.Utils.radial_kernel, \n epochs = 100, \n initial_errors = nothing, \n shuffle = true, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> est_classes = predict(mach, X)\n150-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{3}, String, UInt8, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n UnivariateFinite{Multiclass{3}}(setosa=>0.665, versicolor=>0.245, virginica=>0.09)\n ⋮\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.245, virginica=>0.665)\n UnivariateFinite{Multiclass{3}}(setosa=>0.09, versicolor=>0.665, virginica=>0.245)\n```\n""" -":name" = "KernelPerceptronClassifier" -":human_name" = "kernel perceptron classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:kernel, :epochs, :initial_errors, :shuffle, :rng)`" -":hyperparameter_types" = "`(\"Function\", \"Int64\", \"Union{Nothing, Vector{Vector{Int64}}}\", \"Bool\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [BetaML.KMedoidsClusterer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "BetaML" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "BetaML.Bmlj.KMedoidsClusterer" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameters" = "`(:n_classes, :dist, :initialisation_strategy, :initial_representatives, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "k medoids clusterer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct KMedoidsClusterer <: MLJModelInterface.Unsupervised\n```\n\n# Parameters:\n\n * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`), `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics.\n * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n * \"random\": randomly in the X space\n * \"grid\": using a grid approach\n * \"shuffle\": selecting randomly within the available points [default]\n * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\nThe K-medoids clustering algorithm with customisable distance function, from the Beta Machine Learning Toolkit (BetaML).\n\nSimilar to K-Means, but the \"representatives\" (the cetroids) are guaranteed to be one of the training points. The algorithm work with any arbitrary distance measure.\n\n# Notes:\n\n * data must be numerical\n * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load KMedoidsClusterer pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMedoidsClusterer\n\njulia> model = modelType()\nKMedoidsClusterer(\n n_classes = 3, \n dist = BetaML.Clustering.var\"#39#41\"(), \n initialisation_strategy = \"shuffle\", \n initial_representatives = nothing, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMedoidsClusterer(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\" 3\n \"setosa\" 3\n \"setosa\" 3\n ⋮ \n \"virginica\" 1\n \"virginica\" 1\n \"virginica\" 2\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "BetaML" +":name" = "KMedoidsClusterer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```julia\nmutable struct KMedoidsClusterer <: MLJModelInterface.Unsupervised\n```\n\n# Parameters:\n\n * `n_classes::Int64`: Number of classes to discriminate the data [def: 3]\n * `dist::Function`: Function to employ as distance. Default to the Euclidean distance. Can be one of the predefined distances (`l1_distance`, `l2_distance`, `l2squared_distance`), `cosine_distance`), any user defined function accepting two vectors and returning a scalar or an anonymous function with the same characteristics.\n * `initialisation_strategy::String`: The computation method of the vector of the initial representatives. One of the following:\n\n * \"random\": randomly in the X space\n * \"grid\": using a grid approach\n * \"shuffle\": selecting randomly within the available points [default]\n * \"given\": using a provided set of initial representatives provided in the `initial_representatives` parameter\n\n * `initial_representatives::Union{Nothing, Matrix{Float64}}`: Provided (K x D) matrix of initial representatives (useful only with `initialisation_strategy=\"given\"`) [default: `nothing`]\n * `rng::Random.AbstractRNG`: Random Number Generator [deafult: `Random.GLOBAL_RNG`]\n\nThe K-medoids clustering algorithm with customisable distance function, from the Beta Machine Learning Toolkit (BetaML).\n\nSimilar to K-Means, but the \"representatives\" (the cetroids) are guaranteed to be one of the training points. The algorithm work with any arbitrary distance measure.\n\n# Notes:\n\n * data must be numerical\n * online fitting (re-fitting with new data) is supported\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_iris;\n\njulia> modelType = @load KMedoidsClusterer pkg = \"BetaML\" verbosity=0\nBetaML.Clustering.KMedoidsClusterer\n\njulia> model = modelType()\nKMedoidsClusterer(\n n_classes = 3, \n dist = BetaML.Clustering.var\"#39#41\"(), \n initialisation_strategy = \"shuffle\", \n initial_representatives = nothing, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X);\n\njulia> fit!(mach);\n[ Info: Training machine(KMedoidsClusterer(n_classes = 3, …), …).\n\njulia> classes_est = predict(mach, X);\n\njulia> hcat(y,classes_est)\n150×2 CategoricalArrays.CategoricalArray{Union{Int64, String},2,UInt32}:\n \"setosa\" 3\n \"setosa\" 3\n \"setosa\" 3\n ⋮ \n \"virginica\" 1\n \"virginica\" 1\n \"virginica\" 2\n```\n""" -":name" = "KMedoidsClusterer" -":human_name" = "k medoids clusterer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:n_classes, :dist, :initialisation_strategy, :initial_representatives, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJTransforms.TargetEncoder] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJTransforms" -":package_license" = "unknown" -":load_path" = "MLJTransforms.TargetEncoder" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using\n\nempirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable \ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" -":name" = "TargetEncoder" -":human_name" = "target encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Real\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" -[MLJTransforms.MissingnessEncoder] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJTransforms" -":package_license" = "unknown" -":load_path" = "MLJTransforms.MissingnessEncoder" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +[CatBoost.CatBoostRegressor] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, PythonCall.Core.Py, String}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" +":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostRegressor" +":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :one_hot_max_size, :random_strength, :custom_metric, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :ctr_target_border_count, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" +":is_pure_julia" = "`false`" +":human_name" = "CatBoost regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":iterations" +":docstring" = """```\nCatBoostRegressor\n```\n\nA model type for constructing a CatBoost regressor, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCatBoostRegressor = @load CatBoostRegressor pkg=CatBoost\n```\n\nDo `model = CatBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostRegressor(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostRegressor model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = [2.0, 4.0, 6.0, 7.0]\n\nmodel = CatBoostRegressor(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\npreds = predict(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/CatBoost.jl" +":package_name" = "CatBoost" +":name" = "CatBoostRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" -":name" = "MissingnessEncoder" -":human_name" = "missingness encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.ContrastEncoder] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJTransforms" -":package_license" = "unknown" -":load_path" = "MLJTransforms.ContrastEncoder" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +[CatBoost.CatBoostClassifier] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" +":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostClassifier" +":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :class_weights, :auto_class_weights, :one_hot_max_size, :random_strength, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" +":is_pure_julia" = "`false`" +":human_name" = "CatBoost classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":iterations" +":docstring" = """```\nCatBoostClassifier\n```\n\nA model type for constructing a CatBoost classifier, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCatBoostClassifier = @load CatBoostClassifier pkg=CatBoost\n```\n\nDo `model = CatBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostClassifier(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostClassifier model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = coerce([0, 0, 1, 1], Multiclass)\n\nmodel = CatBoostClassifier(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\nprobs = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/CatBoost.jl" +":package_name" = "CatBoost" +":name" = "CatBoostClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`.\n\nIf `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname, k)`,\n\nwhere `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false, \n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":name" = "ContrastEncoder" -":human_name" = "contrast encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mode", ":reformat", ":selectrows", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.FrequencyEncoder] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJTransforms" -":package_license" = "unknown" -":load_path" = "MLJTransforms.FrequencyEncoder" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +[NearestNeighborModels.KNNClassifier] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "NearestNeighborModels.KNNClassifier" +":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights)`" +":is_pure_julia" = "`true`" +":human_name" = "K-nearest neighbor classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKNNClassifier\n```\n\nA model type for constructing a K-nearest neighbor classifier, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n```\n\nDo `model = KNNClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KNNClassifier(K=...)`.\n\nKNNClassifier implements [K-Nearest Neighbors classifier](https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm) which is non-parametric algorithm that predicts a discrete class distribution associated with a new point by taking a vote over the classes of the k-nearest points. Each neighbor vote is assigned a weight based on proximity of the neighbor point to the test point according to a specified distance metric.\n\nFor more information about the weighting kernels, see the paper by Geler et.al [Comparison of different weighting schemes for the kNN classifier on time-series data](https://perun.pmf.uns.ac.rs/radovanovic/publications/2016-kais-knn-weighting.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `<:Finite` (`<:Multiclass` or `<:OrderedFactor` will do); check the scitype with `scitype(y)`\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is a model hyperparameter, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\nX, y = @load_crabs; # a table and a vector from the crabs dataset\n# view possible kernels\nNearestNeighborModels.list_kernels()\n# KNNClassifier instantiation\nmodel = KNNClassifier(weights = NearestNeighborModels.Inverse())\nmach = machine(model, X, y) |> fit! # wrap model and required data in an MLJ machine and fit\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`MultitargetKNNClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" +":package_name" = "NearestNeighborModels" +":name" = "KNNClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" -":name" = "FrequencyEncoder" -":human_name" = "frequency encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.CardinalityReducer] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJTransforms" -":package_license" = "unknown" -":load_path" = "MLJTransforms.CardinalityReducer" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +[NearestNeighborModels.MultitargetKNNClassifier] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:NTuple{N, AbstractVector}}}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "NearestNeighborModels.MultitargetKNNClassifier" +":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights, :output_type)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget K-nearest neighbor classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultitargetKNNClassifier\n```\n\nA model type for constructing a multitarget K-nearest neighbor classifier, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetKNNClassifier = @load MultitargetKNNClassifier pkg=NearestNeighborModels\n```\n\nDo `model = MultitargetKNNClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetKNNClassifier(K=...)`.\n\nMulti-target K-Nearest Neighbors Classifier (MultitargetKNNClassifier) is a variation of [`KNNClassifier`](@ref) that assumes the target variable is vector-valued with `Multiclass` or `OrderedFactor` components. (Target data must be presented as a table, however.)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * y`is the target, which can be any table of responses whose element scitype is either`<:Finite`(`<:Multiclass`or`<:OrderedFactor`will do); check the columns scitypes with`schema(y)`. Each column of`y` is assumed to belong to a common categorical pool.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is a model hyperparameter, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n * `output_type::Type{<:MultiUnivariateFinite}=DictTable` : One of (`ColumnTable`, `DictTable`). The type of table type to use for predictions. Setting to `ColumnTable` might improve performance for narrow tables while setting to `DictTable` improves performance for wide tables.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are either a `ColumnTable` or `DictTable` of `UnivariateFiniteVector` columns depending on the value set for the `output_type` parameter discussed above. The probabilistic predictions are uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of each column of the table of probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ, StableRNGs\n\n# set rng for reproducibility\nrng = StableRNG(10)\n\n# Dataset generation\nn, p = 10, 3\nX = table(randn(rng, n, p)) # feature table\nfruit, color = categorical([\"apple\", \"orange\"]), categorical([\"blue\", \"green\"])\ny = [(fruit = rand(rng, fruit), color = rand(rng, color)) for _ in 1:n] # target_table\n# Each column in y has a common categorical pool as expected\nselectcols(y, :fruit) # categorical array\nselectcols(y, :color) # categorical array\n\n# Load MultitargetKNNClassifier\nMultitargetKNNClassifier = @load MultitargetKNNClassifier pkg=NearestNeighborModels\n\n# view possible kernels\nNearestNeighborModels.list_kernels()\n\n# MultitargetKNNClassifier instantiation\nmodel = MultitargetKNNClassifier(K=3, weights = NearestNeighborModels.Inverse())\n\n# wrap model and required data in an MLJ machine and fit\nmach = machine(model, X, y) |> fit!\n\n# predict\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`KNNClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" +":package_name" = "NearestNeighborModels" +":name" = "MultitargetKNNClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency < `min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be\n\nan integer or a float which decides whether raw counts or normalized frequencies are used.\n\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" -":name" = "CardinalityReducer" -":human_name" = "cardinality reducer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mode"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJTransforms.OrdinalEncoder] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJTransforms" -":package_license" = "unknown" -":load_path" = "MLJTransforms.OrdinalEncoder" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +[NearestNeighborModels.MultitargetKNNRegressor] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "NearestNeighborModels.MultitargetKNNRegressor" +":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget K-nearest neighbor regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultitargetKNNRegressor\n```\n\nA model type for constructing a multitarget K-nearest neighbor regressor, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetKNNRegressor = @load MultitargetKNNRegressor pkg=NearestNeighborModels\n```\n\nDo `model = MultitargetKNNRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetKNNRegressor(K=...)`.\n\nMulti-target K-Nearest Neighbors regressor (MultitargetKNNRegressor) is a variation of [`KNNRegressor`](@ref) that assumes the target variable is vector-valued with `Continuous` components. (Target data must be presented as a table, however.)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check column scitypes with `schema(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ\n\n# Create Data\nX, y = make_regression(10, 5, n_targets=2)\n\n# load MultitargetKNNRegressor\nMultitargetKNNRegressor = @load MultitargetKNNRegressor pkg=NearestNeighborModels\n\n# view possible kernels\nNearestNeighborModels.list_kernels()\n\n# MutlitargetKNNRegressor instantiation\nmodel = MultitargetKNNRegressor(weights = NearestNeighborModels.Inverse())\n\n# Wrap model and required data in an MLJ machine and fit.\nmach = machine(model, X, y) |> fit! \n\n# Predict\ny_hat = predict(mach, X)\n\n```\n\nSee also [`KNNRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" +":package_name" = "NearestNeighborModels" +":name" = "MultitargetKNNRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" -":name" = "OrdinalEncoder" -":human_name" = "ordinal encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" -[CatBoost.CatBoostRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +[NearestNeighborModels.KNNRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "NearestNeighborModels.KNNRegressor" +":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights)`" +":is_pure_julia" = "`true`" +":human_name" = "K-nearest neighbor regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKNNRegressor\n```\n\nA model type for constructing a K-nearest neighbor regressor, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKNNRegressor = @load KNNRegressor pkg=NearestNeighborModels\n```\n\nDo `model = KNNRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KNNRegressor(K=...)`.\n\nKNNRegressor implements [K-Nearest Neighbors regressor](https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm) which is non-parametric algorithm that predicts the response associated with a new point by taking an weighted average of the response of the K-nearest points.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ\nKNNRegressor = @load KNNRegressor pkg=NearestNeighborModels\nX, y = @load_boston; # loads the crabs dataset from MLJBase\n# view possible kernels\nNearestNeighborModels.list_kernels()\nmodel = KNNRegressor(weights = NearestNeighborModels.Inverse()) #KNNRegressor instantiation\nmach = machine(model, X, y) |> fit! # wrap model and required data in an MLJ machine and fit\ny_hat = predict(mach, X)\n\n```\n\nSee also [`MultitargetKNNRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" +":package_name" = "NearestNeighborModels" +":name" = "KNNRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "CatBoost" -":package_license" = "MIT" -":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostRegressor" -":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" -":package_url" = "https://github.com/JuliaAI/CatBoost.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCatBoostRegressor\n```\n\nA model type for constructing a CatBoost regressor, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCatBoostRegressor = @load CatBoostRegressor pkg=CatBoost\n```\n\nDo `model = CatBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostRegressor(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostRegressor model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = [2.0, 4.0, 6.0, 7.0]\n\nmodel = CatBoostRegressor(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\npreds = predict(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostRegressor`](@ref).\n""" -":name" = "CatBoostRegressor" -":human_name" = "CatBoost regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update", ":feature_importances"] -":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :one_hot_max_size, :random_strength, :custom_metric, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :ctr_target_border_count, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, PythonCall.Core.Py, String}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":iterations" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`true`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[CatBoost.CatBoostClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +[MLJXGBoostInterface.XGBoostCount] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" +":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Count}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJXGBoostInterface.XGBoostCount" +":hyperparameters" = "`(:test, :num_round, :booster, :disable_default_eval_metric, :eta, :num_parallel_tree, :gamma, :max_depth, :min_child_weight, :max_delta_step, :subsample, :colsample_bytree, :colsample_bylevel, :colsample_bynode, :lambda, :alpha, :tree_method, :sketch_eps, :scale_pos_weight, :updater, :refresh_leaf, :process_type, :grow_policy, :max_leaves, :max_bin, :predictor, :sample_type, :normalize_type, :rate_drop, :one_drop, :skip_drop, :feature_selector, :top_k, :tweedie_variance_power, :objective, :base_score, :early_stopping_rounds, :watchlist, :nthread, :importance_type, :seed, :validate_parameters, :eval_metric, :monotone_constraints)`" +":is_pure_julia" = "`false`" +":human_name" = "eXtreme Gradient Boosting Count Regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nXGBoostCount\n```\n\nA model type for constructing a eXtreme Gradient Boosting Count Regressor, based on [XGBoost.jl](https://github.com/dmlc/XGBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nXGBoostCount = @load XGBoostCount pkg=XGBoost\n```\n\nDo `model = XGBoostCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `XGBoostCount(test=...)`.\n\nUnivariate discrete regression using [xgboost](https://xgboost.readthedocs.io/en/stable/index.html).\n\n# Training data\n\nIn `MLJ` or `MLJBase`, bind an instance `model` to data with\n\n```julia\nm = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features, either an `AbstractMatrix` or Tables.jl-compatible table.\n * `y`: is an `AbstractVector` continuous target.\n\nTrain using `fit!(m, rows=...)`.\n\n# Hyper-parameters\n\nSee https://xgboost.readthedocs.io/en/stable/parameter.html.\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/dmlc/XGBoost.jl" +":package_name" = "XGBoost" +":name" = "XGBoostCount" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "CatBoost" -":package_license" = "MIT" -":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostClassifier" -":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" -":package_url" = "https://github.com/JuliaAI/CatBoost.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCatBoostClassifier\n```\n\nA model type for constructing a CatBoost classifier, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCatBoostClassifier = @load CatBoostClassifier pkg=CatBoost\n```\n\nDo `model = CatBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostClassifier(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostClassifier model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = coerce([0, 0, 1, 1], Multiclass)\n\nmodel = CatBoostClassifier(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\nprobs = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostClassifier`](@ref).\n""" -":name" = "CatBoostClassifier" -":human_name" = "CatBoost classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mode", ":reformat", ":selectrows", ":update", ":feature_importances"] -":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :class_weights, :auto_class_weights, :one_hot_max_size, :random_strength, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":iterations" +":implemented_methods" = [":clean!"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[NearestNeighborModels.KNNClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJXGBoostInterface.XGBoostRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" +":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJXGBoostInterface.XGBoostRegressor" +":hyperparameters" = "`(:test, :num_round, :booster, :disable_default_eval_metric, :eta, :num_parallel_tree, :gamma, :max_depth, :min_child_weight, :max_delta_step, :subsample, :colsample_bytree, :colsample_bylevel, :colsample_bynode, :lambda, :alpha, :tree_method, :sketch_eps, :scale_pos_weight, :updater, :refresh_leaf, :process_type, :grow_policy, :max_leaves, :max_bin, :predictor, :sample_type, :normalize_type, :rate_drop, :one_drop, :skip_drop, :feature_selector, :top_k, :tweedie_variance_power, :objective, :base_score, :early_stopping_rounds, :watchlist, :nthread, :importance_type, :seed, :validate_parameters, :eval_metric, :monotone_constraints)`" +":is_pure_julia" = "`false`" +":human_name" = "eXtreme Gradient Boosting Regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nXGBoostRegressor\n```\n\nA model type for constructing a eXtreme Gradient Boosting Regressor, based on [XGBoost.jl](https://github.com/dmlc/XGBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nXGBoostRegressor = @load XGBoostRegressor pkg=XGBoost\n```\n\nDo `model = XGBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `XGBoostRegressor(test=...)`.\n\nUnivariate continuous regression using [xgboost](https://xgboost.readthedocs.io/en/stable/index.html).\n\n# Training data\n\nIn `MLJ` or `MLJBase`, bind an instance `model` to data with\n\n```julia\nm = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features whose columns have `Continuous` element scitype; check column scitypes with `schema(X)`.\n * `y`: is an `AbstractVector` target with `Continuous` elements; check the scitype with `scitype(y)`.\n\nTrain using `fit!(m, rows=...)`.\n\n# Hyper-parameters\n\nSee https://xgboost.readthedocs.io/en/stable/parameter.html.\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/dmlc/XGBoost.jl" +":package_name" = "XGBoost" +":name" = "XGBoostRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "NearestNeighborModels" -":package_license" = "MIT" -":load_path" = "NearestNeighborModels.KNNClassifier" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKNNClassifier\n```\n\nA model type for constructing a K-nearest neighbor classifier, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n```\n\nDo `model = KNNClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KNNClassifier(K=...)`.\n\nKNNClassifier implements [K-Nearest Neighbors classifier](https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm) which is non-parametric algorithm that predicts a discrete class distribution associated with a new point by taking a vote over the classes of the k-nearest points. Each neighbor vote is assigned a weight based on proximity of the neighbor point to the test point according to a specified distance metric.\n\nFor more information about the weighting kernels, see the paper by Geler et.al [Comparison of different weighting schemes for the kNN classifier on time-series data](https://perun.pmf.uns.ac.rs/radovanovic/publications/2016-kais-knn-weighting.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `<:Finite` (`<:Multiclass` or `<:OrderedFactor` will do); check the scitype with `scitype(y)`\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is a model hyperparameter, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\nX, y = @load_crabs; # a table and a vector from the crabs dataset\n# view possible kernels\nNearestNeighborModels.list_kernels()\n# KNNClassifier instantiation\nmodel = KNNClassifier(weights = NearestNeighborModels.Inverse())\nmach = machine(model, X, y) |> fit! # wrap model and required data in an MLJ machine and fit\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`MultitargetKNNClassifier`](@ref)\n""" -":name" = "KNNClassifier" -":human_name" = "K-nearest neighbor classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`true`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[NearestNeighborModels.MultitargetKNNClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJXGBoostInterface.XGBoostClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" +":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJXGBoostInterface.XGBoostClassifier" +":hyperparameters" = "`(:test, :num_round, :booster, :disable_default_eval_metric, :eta, :num_parallel_tree, :gamma, :max_depth, :min_child_weight, :max_delta_step, :subsample, :colsample_bytree, :colsample_bylevel, :colsample_bynode, :lambda, :alpha, :tree_method, :sketch_eps, :scale_pos_weight, :updater, :refresh_leaf, :process_type, :grow_policy, :max_leaves, :max_bin, :predictor, :sample_type, :normalize_type, :rate_drop, :one_drop, :skip_drop, :feature_selector, :top_k, :tweedie_variance_power, :objective, :base_score, :early_stopping_rounds, :watchlist, :nthread, :importance_type, :seed, :validate_parameters, :eval_metric, :monotone_constraints)`" +":is_pure_julia" = "`false`" +":human_name" = "eXtreme Gradient Boosting Classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nXGBoostClassifier\n```\n\nA model type for constructing a eXtreme Gradient Boosting Classifier, based on [XGBoost.jl](https://github.com/dmlc/XGBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nXGBoostClassifier = @load XGBoostClassifier pkg=XGBoost\n```\n\nDo `model = XGBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `XGBoostClassifier(test=...)`.\n\nUnivariate classification using [xgboost](https://xgboost.readthedocs.io/en/stable/index.html).\n\n# Training data\n\nIn `MLJ` or `MLJBase`, bind an instance `model` to data with\n\n```julia\nm = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features, either an `AbstractMatrix` or Tables.jl-compatible table.\n * `y`: is an `AbstractVector` `Finite` target.\n\nTrain using `fit!(m, rows=...)`.\n\n# Hyper-parameters\n\nSee https://xgboost.readthedocs.io/en/stable/parameter.html.\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/dmlc/XGBoost.jl" +":package_name" = "XGBoost" +":name" = "XGBoostClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "NearestNeighborModels" -":package_license" = "MIT" -":load_path" = "NearestNeighborModels.MultitargetKNNClassifier" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetKNNClassifier\n```\n\nA model type for constructing a multitarget K-nearest neighbor classifier, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetKNNClassifier = @load MultitargetKNNClassifier pkg=NearestNeighborModels\n```\n\nDo `model = MultitargetKNNClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetKNNClassifier(K=...)`.\n\nMulti-target K-Nearest Neighbors Classifier (MultitargetKNNClassifier) is a variation of [`KNNClassifier`](@ref) that assumes the target variable is vector-valued with `Multiclass` or `OrderedFactor` components. (Target data must be presented as a table, however.)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * y`is the target, which can be any table of responses whose element scitype is either`<:Finite`(`<:Multiclass`or`<:OrderedFactor`will do); check the columns scitypes with`schema(y)`. Each column of`y` is assumed to belong to a common categorical pool.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is a model hyperparameter, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n * `output_type::Type{<:MultiUnivariateFinite}=DictTable` : One of (`ColumnTable`, `DictTable`). The type of table type to use for predictions. Setting to `ColumnTable` might improve performance for narrow tables while setting to `DictTable` improves performance for wide tables.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are either a `ColumnTable` or `DictTable` of `UnivariateFiniteVector` columns depending on the value set for the `output_type` parameter discussed above. The probabilistic predictions are uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of each column of the table of probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ, StableRNGs\n\n# set rng for reproducibility\nrng = StableRNG(10)\n\n# Dataset generation\nn, p = 10, 3\nX = table(randn(rng, n, p)) # feature table\nfruit, color = categorical([\"apple\", \"orange\"]), categorical([\"blue\", \"green\"])\ny = [(fruit = rand(rng, fruit), color = rand(rng, color)) for _ in 1:n] # target_table\n# Each column in y has a common categorical pool as expected\nselectcols(y, :fruit) # categorical array\nselectcols(y, :color) # categorical array\n\n# Load MultitargetKNNClassifier\nMultitargetKNNClassifier = @load MultitargetKNNClassifier pkg=NearestNeighborModels\n\n# view possible kernels\nNearestNeighborModels.list_kernels()\n\n# MultitargetKNNClassifier instantiation\nmodel = MultitargetKNNClassifier(K=3, weights = NearestNeighborModels.Inverse())\n\n# wrap model and required data in an MLJ machine and fit\nmach = machine(model, X, y) |> fit!\n\n# predict\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`KNNClassifier`](@ref)\n""" -":name" = "MultitargetKNNClassifier" -":human_name" = "multitarget K-nearest neighbor classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mode"] -":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights, :output_type)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:Tuple{Vararg{AbstractVector, N}}}}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`true`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[NearestNeighborModels.MultitargetKNNRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJScikitLearnInterface.ProbabilisticSGDClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "BSD" +":prediction_type" = ":probabilistic" +":load_path" = "MLJScikitLearnInterface.ProbabilisticSGDClassifier" +":hyperparameters" = "`(:loss, :penalty, :alpha, :l1_ratio, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :epsilon, :n_jobs, :random_state, :learning_rate, :eta0, :power_t, :early_stopping, :validation_fraction, :n_iter_no_change, :class_weight, :warm_start, :average)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic sgd classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nProbabilisticSGDClassifier\n```\n\nA model type for constructing a probabilistic sgd classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nProbabilisticSGDClassifier = @load ProbabilisticSGDClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = ProbabilisticSGDClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ProbabilisticSGDClassifier(loss=...)`.\n# Hyper-parameters\n\n- `loss = log_loss`\n\n- `penalty = l2`\n\n- `alpha = 0.0001`\n\n- `l1_ratio = 0.15`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `epsilon = 0.1`\n\n- `n_jobs = nothing`\n\n- `random_state = nothing`\n\n- `learning_rate = optimal`\n\n- `eta0 = 0.0`\n\n- `power_t = 0.5`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `class_weight = nothing`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "ProbabilisticSGDClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "NearestNeighborModels" -":package_license" = "MIT" -":load_path" = "NearestNeighborModels.MultitargetKNNRegressor" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetKNNRegressor\n```\n\nA model type for constructing a multitarget K-nearest neighbor regressor, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetKNNRegressor = @load MultitargetKNNRegressor pkg=NearestNeighborModels\n```\n\nDo `model = MultitargetKNNRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetKNNRegressor(K=...)`.\n\nMulti-target K-Nearest Neighbors regressor (MultitargetKNNRegressor) is a variation of [`KNNRegressor`](@ref) that assumes the target variable is vector-valued with `Continuous` components. (Target data must be presented as a table, however.)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check column scitypes with `schema(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ\n\n# Create Data\nX, y = make_regression(10, 5, n_targets=2)\n\n# load MultitargetKNNRegressor\nMultitargetKNNRegressor = @load MultitargetKNNRegressor pkg=NearestNeighborModels\n\n# view possible kernels\nNearestNeighborModels.list_kernels()\n\n# MutlitargetKNNRegressor instantiation\nmodel = MultitargetKNNRegressor(weights = NearestNeighborModels.Inverse())\n\n# Wrap model and required data in an MLJ machine and fit.\nmach = machine(model, X, y) |> fit! \n\n# Predict\ny_hat = predict(mach, X)\n\n```\n\nSee also [`KNNRegressor`](@ref)\n""" -":name" = "MultitargetKNNRegressor" -":human_name" = "multitarget K-nearest neighbor regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[NearestNeighborModels.KNNRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "NearestNeighborModels" -":package_license" = "MIT" -":load_path" = "NearestNeighborModels.KNNRegressor" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaAI/NearestNeighborModels.jl" ":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nKNNRegressor\n```\n\nA model type for constructing a K-nearest neighbor regressor, based on [NearestNeighborModels.jl](https://github.com/JuliaAI/NearestNeighborModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKNNRegressor = @load KNNRegressor pkg=NearestNeighborModels\n```\n\nDo `model = KNNRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KNNRegressor(K=...)`.\n\nKNNRegressor implements [K-Nearest Neighbors regressor](https://en.wikipedia.org/wiki/K-nearest_neighbor_algorithm) which is non-parametric algorithm that predicts the response associated with a new point by taking an weighted average of the response of the K-nearest points.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `K::Int=5` : number of neighbors\n * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)`\n * `metric::Metric = Euclidean()` : any `Metric` from [Distances.jl](https://github.com/JuliaStats/Distances.jl) for the distance between points. For `algorithm = :kdtree` only metrics which are instances of `Distances.UnionMinkowskiMetric` are supported.\n * `leafsize::Int = algorithm == 10` : determines the number of points at which to stop splitting the tree. This option is ignored and always taken as `0` for `algorithm = :brutetree`, since `brutetree` isn't actually a tree.\n * `reorder::Bool = true` : if `true` then points which are close in distance are placed close in memory. In this case, a copy of the original data will be made so that the original data is left unmodified. Setting this to `true` can significantly improve performance of the specified `algorithm` (except `:brutetree`). This option is ignored and always taken as `false` for `algorithm = :brutetree`.\n * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the k-nearest neighbors for each observation. An instance of one of the types in `list_kernels()`. User-defined weighting functions can be passed by wrapping the function in a [`UserDefinedKernel`](@ref) kernel (do `?NearestNeighborModels.UserDefinedKernel` for more info). If observation weights `w` are passed during machine construction then the weight assigned to each neighbor vote is the product of the kernel generated weight for that neighbor and the corresponding observation weight.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: An instance of either `KDTree`, `BruteTree` or `BallTree` depending on the value of the `algorithm` hyperparameter (See hyper-parameters section above). These are data structures that stores the training data with the view of making quicker nearest neighbor searches on test data points.\n\n# Examples\n\n```\nusing MLJ\nKNNRegressor = @load KNNRegressor pkg=NearestNeighborModels\nX, y = @load_boston; # loads the crabs dataset from MLJBase\n# view possible kernels\nNearestNeighborModels.list_kernels()\nmodel = KNNRegressor(weights = NearestNeighborModels.Inverse()) #KNNRegressor instantiation\nmach = machine(model, X, y) |> fit! # wrap model and required data in an MLJ machine and fit\ny_hat = predict(mach, X)\n\n```\n\nSee also [`MultitargetKNNRegressor`](@ref)\n""" -":name" = "KNNRegressor" -":human_name" = "K-nearest neighbor regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:K, :algorithm, :metric, :leafsize, :reorder, :weights)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" + +[MLJScikitLearnInterface.RidgeCVClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"AbstractArray{Float64}\", \"Bool\", \"Any\", \"Int64\", \"Any\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" ":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJScikitLearnInterface.ProbabilisticSGDClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.ProbabilisticSGDClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nProbabilisticSGDClassifier\n```\n\nA model type for constructing a probabilistic sgd classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nProbabilisticSGDClassifier = @load ProbabilisticSGDClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = ProbabilisticSGDClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ProbabilisticSGDClassifier(loss=...)`.\n# Hyper-parameters\n\n- `loss = log_loss`\n\n- `penalty = l2`\n\n- `alpha = 0.0001`\n\n- `l1_ratio = 0.15`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `epsilon = 0.1`\n\n- `n_jobs = nothing`\n\n- `random_state = nothing`\n\n- `learning_rate = optimal`\n\n- `eta0 = 0.0`\n\n- `power_t = 0.5`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `class_weight = nothing`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" -":name" = "ProbabilisticSGDClassifier" -":human_name" = "probabilistic sgd classifier" +":prediction_type" = ":deterministic" +":load_path" = "MLJScikitLearnInterface.RidgeCVClassifier" +":hyperparameters" = "`(:alphas, :fit_intercept, :scoring, :cv, :class_weight, :store_cv_values)`" +":is_pure_julia" = "`false`" +":human_name" = "ridge regression classifier with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:loss, :penalty, :alpha, :l1_ratio, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :epsilon, :n_jobs, :random_state, :learning_rate, :eta0, :power_t, :early_stopping, :validation_fraction, :n_iter_no_change, :class_weight, :warm_start, :average)`" -":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJScikitLearnInterface.RidgeCVClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```\nRidgeCVClassifier\n```\n\nA model type for constructing a ridge regression classifier with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeCVClassifier = @load RidgeCVClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeCVClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeCVClassifier(alphas=...)`.\n# Hyper-parameters\n\n- `alphas = [0.1, 1.0, 10.0]`\n\n- `fit_intercept = true`\n\n- `scoring = nothing`\n\n- `cv = 5`\n\n- `class_weight = nothing`\n\n- `store_cv_values = false`\n\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" -":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.RidgeCVClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "RidgeCVClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nRidgeCVClassifier\n```\n\nA model type for constructing a ridge regression classifier with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeCVClassifier = @load RidgeCVClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeCVClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeCVClassifier(alphas=...)`.\n# Hyper-parameters\n\n- `alphas = [0.1, 1.0, 10.0]`\n\n- `fit_intercept = true`\n\n- `scoring = nothing`\n\n- `cv = 5`\n\n- `class_weight = nothing`\n\n- `store_cv_values = false`\n\n""" -":name" = "RidgeCVClassifier" -":human_name" = "ridge regression classifier with built-in cross-validation" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alphas, :fit_intercept, :scoring, :cv, :class_weight, :store_cv_values)`" -":hyperparameter_types" = "`(\"AbstractArray{Float64}\", \"Bool\", \"Any\", \"Int64\", \"Any\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LogisticClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Bool\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Any\", \"Any\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.LogisticClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:penalty, :dual, :tol, :C, :fit_intercept, :intercept_scaling, :class_weight, :random_state, :solver, :max_iter, :multi_class, :verbose, :warm_start, :n_jobs, :l1_ratio)`" +":is_pure_julia" = "`false`" +":human_name" = "logistic regression classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLogisticClassifier\n```\n\nA model type for constructing a logistic regression classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLogisticClassifier = @load LogisticClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = LogisticClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LogisticClassifier(penalty=...)`.\n# Hyper-parameters\n\n- `penalty = l2`\n\n- `dual = false`\n\n- `tol = 0.0001`\n\n- `C = 1.0`\n\n- `fit_intercept = true`\n\n- `intercept_scaling = 1.0`\n\n- `class_weight = nothing`\n\n- `random_state = nothing`\n\n- `solver = lbfgs`\n\n- `max_iter = 100`\n\n- `multi_class = auto`\n\n- `verbose = 0`\n\n- `warm_start = false`\n\n- `n_jobs = nothing`\n\n- `l1_ratio = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "LogisticClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLogisticClassifier\n```\n\nA model type for constructing a logistic regression classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLogisticClassifier = @load LogisticClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = LogisticClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LogisticClassifier(penalty=...)`.\n# Hyper-parameters\n\n- `penalty = l2`\n\n- `dual = false`\n\n- `tol = 0.0001`\n\n- `C = 1.0`\n\n- `fit_intercept = true`\n\n- `intercept_scaling = 1.0`\n\n- `class_weight = nothing`\n\n- `random_state = nothing`\n\n- `solver = lbfgs`\n\n- `max_iter = 100`\n\n- `multi_class = auto`\n\n- `verbose = 0`\n\n- `warm_start = false`\n\n- `n_jobs = nothing`\n\n- `l1_ratio = nothing`\n\n""" -":name" = "LogisticClassifier" -":human_name" = "logistic regression classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:penalty, :dual, :tol, :C, :fit_intercept, :intercept_scaling, :class_weight, :random_state, :solver, :max_iter, :multi_class, :verbose, :warm_start, :n_jobs, :l1_ratio)`" -":hyperparameter_types" = "`(\"String\", \"Bool\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Any\", \"Any\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.RandomForestRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.RandomForestRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nRandomForestRegressor\n```\n\nA model type for constructing a random forest regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestRegressor = @load RandomForestRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RandomForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestRegressor(n_estimators=...)`.\n\nA random forest is a meta estimator that fits a number of classifying decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting. The sub-sample size is controlled with the `max_samples` parameter if `bootstrap=True` (default), otherwise the whole dataset is used to build each tree.\n""" -":name" = "RandomForestRegressor" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :ccp_alpha, :max_samples, :monotonic_cst)`" +":is_pure_julia" = "`false`" ":human_name" = "random forest regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :ccp_alpha, :max_samples, :monotonic_cst)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nRandomForestRegressor\n```\n\nA model type for constructing a random forest regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestRegressor = @load RandomForestRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RandomForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestRegressor(n_estimators=...)`.\n\nA random forest is a meta estimator that fits a number of classifying decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting. The sub-sample size is controlled with the `max_samples` parameter if `bootstrap=True` (default), otherwise the whole dataset is used to build each tree.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "RandomForestRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.ElasticNetCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.ElasticNetCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nElasticNetCVRegressor\n```\n\nA model type for constructing a elastic net regression with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nElasticNetCVRegressor = @load ElasticNetCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ElasticNetCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ElasticNetCVRegressor(l1_ratio=...)`.\n# Hyper-parameters\n\n- `l1_ratio = 0.5`\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `precompute = auto`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `cv = 5`\n\n- `copy_X = true`\n\n- `verbose = 0`\n\n- `n_jobs = nothing`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "ElasticNetCVRegressor" +":hyperparameters" = "`(:l1_ratio, :eps, :n_alphas, :alphas, :fit_intercept, :precompute, :max_iter, :tol, :cv, :copy_X, :verbose, :n_jobs, :positive, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "elastic net regression with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:l1_ratio, :eps, :n_alphas, :alphas, :fit_intercept, :precompute, :max_iter, :tol, :cv, :copy_X, :verbose, :n_jobs, :positive, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJScikitLearnInterface.PerceptronClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```\nElasticNetCVRegressor\n```\n\nA model type for constructing a elastic net regression with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nElasticNetCVRegressor = @load ElasticNetCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ElasticNetCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ElasticNetCVRegressor(l1_ratio=...)`.\n# Hyper-parameters\n\n- `l1_ratio = 0.5`\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `precompute = auto`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `cv = 5`\n\n- `copy_X = true`\n\n- `verbose = 0`\n\n- `n_jobs = nothing`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" -":package_license" = "BSD" -":load_path" = "MLJScikitLearnInterface.PerceptronClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "ElasticNetCVRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nPerceptronClassifier\n```\n\nA model type for constructing a perceptron classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nPerceptronClassifier = @load PerceptronClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = PerceptronClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`PerceptronClassifier(penalty=...)`.\n# Hyper-parameters\n\n- `penalty = nothing`\n\n- `alpha = 0.0001`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `eta0 = 1.0`\n\n- `n_jobs = nothing`\n\n- `random_state = 0`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `class_weight = nothing`\n\n- `warm_start = false`\n\n""" -":name" = "PerceptronClassifier" -":human_name" = "perceptron classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:penalty, :alpha, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :eta0, :n_jobs, :random_state, :early_stopping, :validation_fraction, :n_iter_no_change, :class_weight, :warm_start)`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJScikitLearnInterface.PerceptronClassifier] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Nothing, String}\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "BSD" +":prediction_type" = ":deterministic" +":load_path" = "MLJScikitLearnInterface.PerceptronClassifier" +":hyperparameters" = "`(:penalty, :alpha, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :eta0, :n_jobs, :random_state, :early_stopping, :validation_fraction, :n_iter_no_change, :class_weight, :warm_start)`" +":is_pure_julia" = "`false`" +":human_name" = "perceptron classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nPerceptronClassifier\n```\n\nA model type for constructing a perceptron classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nPerceptronClassifier = @load PerceptronClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = PerceptronClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`PerceptronClassifier(penalty=...)`.\n# Hyper-parameters\n\n- `penalty = nothing`\n\n- `alpha = 0.0001`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `eta0 = 1.0`\n\n- `n_jobs = nothing`\n\n- `random_state = 0`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `class_weight = nothing`\n\n- `warm_start = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "PerceptronClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.MultiTaskLassoRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.MultiTaskLassoRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nMultiTaskLassoRegressor\n```\n\nA model type for constructing a multi-target lasso regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskLassoRegressor = @load MultiTaskLassoRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskLassoRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskLassoRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `copy_X = true`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "MultiTaskLassoRegressor" +":hyperparameters" = "`(:alpha, :fit_intercept, :max_iter, :tol, :copy_X, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "multi-target lasso regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alpha, :fit_intercept, :max_iter, :tol, :copy_X, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultiTaskLassoRegressor\n```\n\nA model type for constructing a multi-target lasso regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskLassoRegressor = @load MultiTaskLassoRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskLassoRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskLassoRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `copy_X = true`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "MultiTaskLassoRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LinearRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LinearRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a ordinary least-squares regressor (OLS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLinearRegressor = @load LinearRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LinearRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `n_jobs = nothing`\n\n""" -":name" = "LinearRegressor" +":hyperparameters" = "`(:fit_intercept, :copy_X, :n_jobs)`" +":is_pure_julia" = "`false`" ":human_name" = "ordinary least-squares regressor (OLS)" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :copy_X, :n_jobs)`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a ordinary least-squares regressor (OLS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLinearRegressor = @load LinearRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LinearRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `n_jobs = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LinearRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.HDBSCAN] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, Int64}\", \"String\", \"Float64\", \"String\", \"Int64\", \"String\", \"Bool\", \"Union{Nothing, String}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.HDBSCAN" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:min_cluster_size, :min_samples, :cluster_selection_epsilon, :max_cluster_size, :metric, :alpha, :algorithm, :leaf_size, :cluster_selection_method, :allow_single_cluster, :store_centers)`" +":is_pure_julia" = "`false`" +":human_name" = "hdbscan" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nHDBSCAN\n```\n\nA model type for constructing a hdbscan, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHDBSCAN = @load HDBSCAN pkg=MLJScikitLearnInterface\n```\n\nDo `model = HDBSCAN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HDBSCAN(min_cluster_size=...)`.\n\nHierarchical Density-Based Spatial Clustering of Applications with Noise. Performs [`DBSCAN`](@ref) over varying epsilon values and integrates the result to find a clustering that gives the best stability over epsilon. This allows HDBSCAN to find clusters of varying densities (unlike [`DBSCAN`](@ref)), and be more robust to parameter selection. \n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "HDBSCAN" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nHDBSCAN\n```\n\nA model type for constructing a hdbscan, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHDBSCAN = @load HDBSCAN pkg=MLJScikitLearnInterface\n```\n\nDo `model = HDBSCAN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HDBSCAN(min_cluster_size=...)`.\n\nHierarchical Density-Based Spatial Clustering of Applications with Noise. Performs [`DBSCAN`](@ref) over varying epsilon values and integrates the result to find a clustering that gives the best stability over epsilon. This allows HDBSCAN to find clusters of varying densities (unlike [`DBSCAN`](@ref)), and be more robust to parameter selection. \n""" -":name" = "HDBSCAN" -":human_name" = "hdbscan" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params"] -":hyperparameters" = "`(:min_cluster_size, :min_samples, :cluster_selection_epsilon, :max_cluster_size, :metric, :alpha, :algorithm, :leaf_size, :cluster_selection_method, :allow_single_cluster, :store_centers)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, Int64}\", \"String\", \"Float64\", \"String\", \"Int64\", \"String\", \"Bool\", \"Union{Nothing, String}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.DBSCAN] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.DBSCAN" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nDBSCAN\n```\n\nA model type for constructing a dbscan, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDBSCAN = @load DBSCAN pkg=MLJScikitLearnInterface\n```\n\nDo `model = DBSCAN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DBSCAN(eps=...)`.\n\nDensity-Based Spatial Clustering of Applications with Noise. Finds core samples of high density and expands clusters from them. Good for data which contains clusters of similar density.\n""" -":name" = "DBSCAN" +":hyperparameters" = "`(:eps, :min_samples, :metric, :algorithm, :leaf_size, :p, :n_jobs)`" +":is_pure_julia" = "`false`" ":human_name" = "dbscan" ":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params"] -":hyperparameters" = "`(:eps, :min_samples, :metric, :algorithm, :leaf_size, :p, :n_jobs)`" -":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":docstring" = """```\nDBSCAN\n```\n\nA model type for constructing a dbscan, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDBSCAN = @load DBSCAN pkg=MLJScikitLearnInterface\n```\n\nDo `model = DBSCAN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DBSCAN(eps=...)`.\n\nDensity-Based Spatial Clustering of Applications with Noise. Finds core samples of high density and expands clusters from them. Good for data which contains clusters of similar density.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "DBSCAN" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.RidgeRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"String\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.RidgeRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeRegressor = @load RidgeRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `solver = auto`\n\n- `random_state = nothing`\n\n""" -":name" = "RidgeRegressor" +":hyperparameters" = "`(:alpha, :fit_intercept, :copy_X, :max_iter, :tol, :solver, :random_state)`" +":is_pure_julia" = "`false`" ":human_name" = "ridge regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alpha, :fit_intercept, :copy_X, :max_iter, :tol, :solver, :random_state)`" -":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"String\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeRegressor = @load RidgeRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `solver = auto`\n\n- `random_state = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "RidgeRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LassoLarsICRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LassoLarsICRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLassoLarsICRegressor\n```\n\nA model type for constructing a Lasso model with LARS using BIC or AIC for model selection, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsICRegressor = @load LassoLarsICRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsICRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsICRegressor(criterion=...)`.\n# Hyper-parameters\n\n- `criterion = aic`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n""" -":name" = "LassoLarsICRegressor" +":hyperparameters" = "`(:criterion, :fit_intercept, :verbose, :precompute, :max_iter, :eps, :copy_X, :positive)`" +":is_pure_julia" = "`false`" ":human_name" = "Lasso model with LARS using BIC or AIC for model selection" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:criterion, :fit_intercept, :verbose, :precompute, :max_iter, :eps, :copy_X, :positive)`" -":hyperparameter_types" = "`(\"String\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nLassoLarsICRegressor\n```\n\nA model type for constructing a Lasso model with LARS using BIC or AIC for model selection, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsICRegressor = @load LassoLarsICRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsICRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsICRegressor(criterion=...)`.\n# Hyper-parameters\n\n- `criterion = aic`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LassoLarsICRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.ARDRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.ARDRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nARDRegressor\n```\n\nA model type for constructing a Bayesian ARD regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nARDRegressor = @load ARDRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ARDRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ARDRegressor(max_iter=...)`.\n# Hyper-parameters\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `threshold_lambda = 10000.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n""" -":name" = "ARDRegressor" +":hyperparameters" = "`(:max_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :threshold_lambda, :fit_intercept, :copy_X, :verbose)`" +":is_pure_julia" = "`false`" ":human_name" = "Bayesian ARD regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:max_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :threshold_lambda, :fit_intercept, :copy_X, :verbose)`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nARDRegressor\n```\n\nA model type for constructing a Bayesian ARD regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nARDRegressor = @load ARDRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ARDRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ARDRegressor(max_iter=...)`.\n# Hyper-parameters\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `threshold_lambda = 10000.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "ARDRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SVMNuRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Int64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SVMNuRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:nu, :C, :kernel, :degree, :gamma, :coef0, :shrinking, :tol, :cache_size, :max_iter)`" +":is_pure_julia" = "`false`" +":human_name" = "nu-support vector regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSVMNuRegressor\n```\n\nA model type for constructing a nu-support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMNuRegressor = @load SVMNuRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMNuRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMNuRegressor(nu=...)`.\n# Hyper-parameters\n\n- `nu = 0.5`\n\n- `C = 1.0`\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `shrinking = true`\n\n- `tol = 0.001`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "SVMNuRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSVMNuRegressor\n```\n\nA model type for constructing a nu-support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMNuRegressor = @load SVMNuRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMNuRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMNuRegressor(nu=...)`.\n# Hyper-parameters\n\n- `nu = 0.5`\n\n- `C = 1.0`\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `shrinking = true`\n\n- `tol = 0.001`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n""" -":name" = "SVMNuRegressor" -":human_name" = "nu-support vector regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:nu, :C, :kernel, :degree, :gamma, :coef0, :shrinking, :tol, :cache_size, :max_iter)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.RidgeClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"String\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.RidgeClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nRidgeClassifier\n```\n\nA model type for constructing a ridge regression classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeClassifier = @load RidgeClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeClassifier(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_iter = nothing`\n\n- `tol = 0.001`\n\n- `class_weight = nothing`\n\n- `solver = auto`\n\n- `random_state = nothing`\n\n""" -":name" = "RidgeClassifier" +":hyperparameters" = "`(:alpha, :fit_intercept, :copy_X, :max_iter, :tol, :class_weight, :solver, :random_state)`" +":is_pure_julia" = "`false`" ":human_name" = "ridge regression classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alpha, :fit_intercept, :copy_X, :max_iter, :tol, :class_weight, :solver, :random_state)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"String\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":docstring" = """```\nRidgeClassifier\n```\n\nA model type for constructing a ridge regression classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeClassifier = @load RidgeClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeClassifier(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_iter = nothing`\n\n- `tol = 0.001`\n\n- `class_weight = nothing`\n\n- `solver = auto`\n\n- `random_state = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "RidgeClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SGDRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Float64\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SGDRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nSGDRegressor\n```\n\nA model type for constructing a stochastic gradient descent-based regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSGDRegressor = @load SGDRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SGDRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SGDRegressor(loss=...)`.\n# Hyper-parameters\n\n- `loss = squared_error`\n\n- `penalty = l2`\n\n- `alpha = 0.0001`\n\n- `l1_ratio = 0.15`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `epsilon = 0.1`\n\n- `random_state = nothing`\n\n- `learning_rate = invscaling`\n\n- `eta0 = 0.01`\n\n- `power_t = 0.25`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" -":name" = "SGDRegressor" +":hyperparameters" = "`(:loss, :penalty, :alpha, :l1_ratio, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :epsilon, :random_state, :learning_rate, :eta0, :power_t, :early_stopping, :validation_fraction, :n_iter_no_change, :warm_start, :average)`" +":is_pure_julia" = "`false`" ":human_name" = "stochastic gradient descent-based regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:loss, :penalty, :alpha, :l1_ratio, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :epsilon, :random_state, :learning_rate, :eta0, :power_t, :early_stopping, :validation_fraction, :n_iter_no_change, :warm_start, :average)`" -":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Float64\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nSGDRegressor\n```\n\nA model type for constructing a stochastic gradient descent-based regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSGDRegressor = @load SGDRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SGDRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SGDRegressor(loss=...)`.\n# Hyper-parameters\n\n- `loss = squared_error`\n\n- `penalty = l2`\n\n- `alpha = 0.0001`\n\n- `l1_ratio = 0.15`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `epsilon = 0.1`\n\n- `random_state = nothing`\n\n- `learning_rate = invscaling`\n\n- `eta0 = 0.01`\n\n- `power_t = 0.25`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "SGDRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.ComplementNBClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.ComplementNBClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:alpha, :fit_prior, :class_prior, :norm)`" +":is_pure_julia" = "`false`" +":human_name" = "Complement naive Bayes classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nComplementNBClassifier\n```\n\nA model type for constructing a Complement naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nComplementNBClassifier = @load ComplementNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = ComplementNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ComplementNBClassifier(alpha=...)`.\n\nSimilar to [`MultinomialNBClassifier`](@ref) but with more robust assumptions. Suited for imbalanced datasets.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "ComplementNBClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nComplementNBClassifier\n```\n\nA model type for constructing a Complement naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nComplementNBClassifier = @load ComplementNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = ComplementNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ComplementNBClassifier(alpha=...)`.\n\nSimilar to [`MultinomialNBClassifier`](@ref) but with more robust assumptions. Suited for imbalanced datasets.\n""" -":name" = "ComplementNBClassifier" -":human_name" = "Complement naive Bayes classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alpha, :fit_prior, :class_prior, :norm)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.HuberRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Float64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.HuberRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nHuberRegressor\n```\n\nA model type for constructing a Huber regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nHuberRegressor = @load HuberRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = HuberRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`HuberRegressor(epsilon=...)`.\n# Hyper-parameters\n\n- `epsilon = 1.35`\n\n- `max_iter = 100`\n\n- `alpha = 0.0001`\n\n- `warm_start = false`\n\n- `fit_intercept = true`\n\n- `tol = 1.0e-5`\n\n""" -":name" = "HuberRegressor" +":hyperparameters" = "`(:epsilon, :max_iter, :alpha, :warm_start, :fit_intercept, :tol)`" +":is_pure_julia" = "`false`" ":human_name" = "Huber regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:epsilon, :max_iter, :alpha, :warm_start, :fit_intercept, :tol)`" -":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nHuberRegressor\n```\n\nA model type for constructing a Huber regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nHuberRegressor = @load HuberRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = HuberRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`HuberRegressor(epsilon=...)`.\n# Hyper-parameters\n\n- `epsilon = 1.35`\n\n- `max_iter = 100`\n\n- `alpha = 0.0001`\n\n- `warm_start = false`\n\n- `fit_intercept = true`\n\n- `tol = 1.0e-5`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "HuberRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SVMNuClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SVMNuClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:nu, :kernel, :degree, :gamma, :coef0, :shrinking, :tol, :cache_size, :max_iter, :decision_function_shape, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "nu-support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSVMNuClassifier\n```\n\nA model type for constructing a nu-support vector classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMNuClassifier = @load SVMNuClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMNuClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMNuClassifier(nu=...)`.\n# Hyper-parameters\n\n- `nu = 0.5`\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `shrinking = true`\n\n- `tol = 0.001`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n- `decision_function_shape = ovr`\n\n- `random_state = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "SVMNuClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSVMNuClassifier\n```\n\nA model type for constructing a nu-support vector classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMNuClassifier = @load SVMNuClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMNuClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMNuClassifier(nu=...)`.\n# Hyper-parameters\n\n- `nu = 0.5`\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `shrinking = true`\n\n- `tol = 0.001`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n- `decision_function_shape = ovr`\n\n- `random_state = nothing`\n\n""" -":name" = "SVMNuClassifier" -":human_name" = "nu-support vector classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:nu, :kernel, :degree, :gamma, :coef0, :shrinking, :tol, :cache_size, :max_iter, :decision_function_shape, :random_state)`" -":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.GradientBoostingClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.GradientBoostingClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nGradientBoostingClassifier\n```\n\nA model type for constructing a gradient boosting classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGradientBoostingClassifier = @load GradientBoostingClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GradientBoostingClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `GradientBoostingClassifier(loss=...)`.\n\nThis algorithm builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage `n_classes_` regression trees are fit on the negative gradient of the loss function, e.g. binary or multiclass log loss. Binary classification is a special case where only a single regression tree is induced.\n\n[`HistGradientBoostingClassifier`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" -":name" = "GradientBoostingClassifier" +":hyperparameters" = "`(:loss, :learning_rate, :n_estimators, :subsample, :criterion, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_depth, :min_impurity_decrease, :init, :random_state, :max_features, :verbose, :max_leaf_nodes, :warm_start, :validation_fraction, :n_iter_no_change, :tol)`" +":is_pure_julia" = "`false`" ":human_name" = "gradient boosting classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:loss, :learning_rate, :n_estimators, :subsample, :criterion, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_depth, :min_impurity_decrease, :init, :random_state, :max_features, :verbose, :max_leaf_nodes, :warm_start, :validation_fraction, :n_iter_no_change, :tol)`" -":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":docstring" = """```\nGradientBoostingClassifier\n```\n\nA model type for constructing a gradient boosting classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGradientBoostingClassifier = @load GradientBoostingClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GradientBoostingClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `GradientBoostingClassifier(loss=...)`.\n\nThis algorithm builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage `n_classes_` regression trees are fit on the negative gradient of the loss function, e.g. binary or multiclass log loss. Binary classification is a special case where only a single regression tree is induced.\n\n[`HistGradientBoostingClassifier`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "GradientBoostingClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.GaussianProcessRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Union{Float64, AbstractArray}\", \"Any\", \"Int64\", \"Bool\", \"Bool\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.GaussianProcessRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:kernel, :alpha, :optimizer, :n_restarts_optimizer, :normalize_y, :copy_X_train, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "Gaussian process regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nGaussianProcessRegressor\n```\n\nA model type for constructing a Gaussian process regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianProcessRegressor = @load GaussianProcessRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianProcessRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianProcessRegressor(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = nothing`\n\n- `alpha = 1.0e-10`\n\n- `optimizer = fmin_l_bfgs_b`\n\n- `n_restarts_optimizer = 0`\n\n- `normalize_y = false`\n\n- `copy_X_train = true`\n\n- `random_state = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "GaussianProcessRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nGaussianProcessRegressor\n```\n\nA model type for constructing a Gaussian process regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianProcessRegressor = @load GaussianProcessRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianProcessRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianProcessRegressor(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = nothing`\n\n- `alpha = 1.0e-10`\n\n- `optimizer = fmin_l_bfgs_b`\n\n- `n_restarts_optimizer = 0`\n\n- `normalize_y = false`\n\n- `copy_X_train = true`\n\n- `random_state = nothing`\n\n""" -":name" = "GaussianProcessRegressor" -":human_name" = "Gaussian process regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :alpha, :optimizer, :n_restarts_optimizer, :normalize_y, :copy_X_train, :random_state)`" -":hyperparameter_types" = "`(\"Any\", \"Union{Float64, AbstractArray}\", \"Any\", \"Int64\", \"Bool\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SVMLinearRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Any\", \"Int64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SVMLinearRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nSVMLinearRegressor\n```\n\nA model type for constructing a linear support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMLinearRegressor = @load SVMLinearRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMLinearRegressor(epsilon=...)`.\n# Hyper-parameters\n\n- `epsilon = 0.0`\n\n- `tol = 0.0001`\n\n- `C = 1.0`\n\n- `loss = epsilon_insensitive`\n\n- `fit_intercept = true`\n\n- `intercept_scaling = 1.0`\n\n- `dual = true`\n\n- `random_state = nothing`\n\n- `max_iter = 1000`\n\n""" -":name" = "SVMLinearRegressor" +":hyperparameters" = "`(:epsilon, :tol, :C, :loss, :fit_intercept, :intercept_scaling, :dual, :random_state, :max_iter)`" +":is_pure_julia" = "`false`" ":human_name" = "linear support vector regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:epsilon, :tol, :C, :loss, :fit_intercept, :intercept_scaling, :dual, :random_state, :max_iter)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Any\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nSVMLinearRegressor\n```\n\nA model type for constructing a linear support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMLinearRegressor = @load SVMLinearRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMLinearRegressor(epsilon=...)`.\n# Hyper-parameters\n\n- `epsilon = 0.0`\n\n- `tol = 0.0001`\n\n- `C = 1.0`\n\n- `loss = epsilon_insensitive`\n\n- `fit_intercept = true`\n\n- `intercept_scaling = 1.0`\n\n- `dual = true`\n\n- `random_state = nothing`\n\n- `max_iter = 1000`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "SVMLinearRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LarsRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LarsRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLarsRegressor\n```\n\nA model type for constructing a least angle regressor (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsRegressor = @load LarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `n_nonzero_coefs = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n""" -":name" = "LarsRegressor" +":hyperparameters" = "`(:fit_intercept, :verbose, :precompute, :n_nonzero_coefs, :eps, :copy_X, :fit_path)`" +":is_pure_julia" = "`false`" ":human_name" = "least angle regressor (LARS)" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :verbose, :precompute, :n_nonzero_coefs, :eps, :copy_X, :fit_path)`" -":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nLarsRegressor\n```\n\nA model type for constructing a least angle regressor (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsRegressor = @load LarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `n_nonzero_coefs = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LarsRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.MeanShift] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Float64}\", \"Union{Nothing, AbstractArray}\", \"Bool\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.MeanShift" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:bandwidth, :seeds, :bin_seeding, :min_bin_freq, :cluster_all, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "mean shift" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMeanShift\n```\n\nA model type for constructing a mean shift, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMeanShift = @load MeanShift pkg=MLJScikitLearnInterface\n```\n\nDo `model = MeanShift()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MeanShift(bandwidth=...)`.\n\nMean shift clustering using a flat kernel. Mean shift clustering aims to discover \"blobs\" in a smooth density of samples. It is a centroid-based algorithm, which works by updating candidates for centroids to be the mean of the points within a given region. These candidates are then filtered in a post-processing stage to eliminate near-duplicates to form the final set of centroids.\"\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "MeanShift" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMeanShift\n```\n\nA model type for constructing a mean shift, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMeanShift = @load MeanShift pkg=MLJScikitLearnInterface\n```\n\nDo `model = MeanShift()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MeanShift(bandwidth=...)`.\n\nMean shift clustering using a flat kernel. Mean shift clustering aims to discover \"blobs\" in a smooth density of samples. It is a centroid-based algorithm, which works by updating candidates for centroids to be the mean of the points within a given region. These candidates are then filtered in a post-processing stage to eliminate near-duplicates to form the final set of centroids.\"\n""" -":name" = "MeanShift" -":human_name" = "mean shift" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:bandwidth, :seeds, :bin_seeding, :min_bin_freq, :cluster_all, :n_jobs)`" -":hyperparameter_types" = "`(\"Union{Nothing, Float64}\", \"Union{Nothing, AbstractArray}\", \"Bool\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.HistGradientBoostingClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.HistGradientBoostingClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nHistGradientBoostingClassifier\n```\n\nA model type for constructing a hist gradient boosting classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHistGradientBoostingClassifier = @load HistGradientBoostingClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = HistGradientBoostingClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HistGradientBoostingClassifier(loss=...)`.\n\nThis algorithm builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage `n_classes_` regression trees are fit on the negative gradient of the loss function, e.g. binary or multiclass log loss. Binary classification is a special case where only a single regression tree is induced.\n\n[`HistGradientBoostingClassifier`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" -":name" = "HistGradientBoostingClassifier" +":hyperparameters" = "`(:loss, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :interaction_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state, :class_weight)`" +":is_pure_julia" = "`false`" ":human_name" = "hist gradient boosting classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:loss, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :interaction_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state, :class_weight)`" -":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":docstring" = """```\nHistGradientBoostingClassifier\n```\n\nA model type for constructing a hist gradient boosting classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHistGradientBoostingClassifier = @load HistGradientBoostingClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = HistGradientBoostingClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HistGradientBoostingClassifier(loss=...)`.\n\nThis algorithm builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage `n_classes_` regression trees are fit on the negative gradient of the loss function, e.g. binary or multiclass log loss. Binary classification is a special case where only a single regression tree is induced.\n\n[`HistGradientBoostingClassifier`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "HistGradientBoostingClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.AdaBoostRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.AdaBoostRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nAdaBoostRegressor\n```\n\nA model type for constructing a AdaBoost ensemble regression, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostRegressor = @load AdaBoostRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = AdaBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostRegressor(estimator=...)`.\n\nAn AdaBoost regressor is a meta-estimator that begins by fitting a regressor on the original dataset and then fits additional copies of the regressor on the same dataset but where the weights of instances are adjusted according to the error of the current prediction. As such, subsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2.\n""" -":name" = "AdaBoostRegressor" +":hyperparameters" = "`(:estimator, :n_estimators, :learning_rate, :loss, :random_state)`" +":is_pure_julia" = "`false`" ":human_name" = "AdaBoost ensemble regression" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:estimator, :n_estimators, :learning_rate, :loss, :random_state)`" -":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nAdaBoostRegressor\n```\n\nA model type for constructing a AdaBoost ensemble regression, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostRegressor = @load AdaBoostRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = AdaBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostRegressor(estimator=...)`.\n\nAn AdaBoost regressor is a meta-estimator that begins by fitting a regressor on the original dataset and then fits additional copies of the regressor on the same dataset but where the weights of instances are adjusted according to the error of the current prediction. As such, subsequent regressors focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost.R2.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "AdaBoostRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.AffinityPropagation] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"String\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.AffinityPropagation" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:damping, :max_iter, :convergence_iter, :copy, :preference, :affinity, :verbose)`" +":is_pure_julia" = "`false`" +":human_name" = "Affinity Propagation Clustering of data" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nAffinityPropagation\n```\n\nA model type for constructing a Affinity Propagation Clustering of data, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nAffinityPropagation = @load AffinityPropagation pkg=MLJScikitLearnInterface\n```\n\nDo `model = AffinityPropagation()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`AffinityPropagation(damping=...)`.\n# Hyper-parameters\n\n- `damping = 0.5`\n\n- `max_iter = 200`\n\n- `convergence_iter = 15`\n\n- `copy = true`\n\n- `preference = nothing`\n\n- `affinity = euclidean`\n\n- `verbose = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "AffinityPropagation" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nAffinityPropagation\n```\n\nA model type for constructing a Affinity Propagation Clustering of data, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nAffinityPropagation = @load AffinityPropagation pkg=MLJScikitLearnInterface\n```\n\nDo `model = AffinityPropagation()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`AffinityPropagation(damping=...)`.\n# Hyper-parameters\n\n- `damping = 0.5`\n\n- `max_iter = 200`\n\n- `convergence_iter = 15`\n\n- `copy = true`\n\n- `preference = nothing`\n\n- `affinity = euclidean`\n\n- `verbose = false`\n\n""" -":name" = "AffinityPropagation" -":human_name" = "Affinity Propagation Clustering of data" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:damping, :max_iter, :convergence_iter, :copy, :preference, :affinity, :verbose)`" -":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"String\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.MultiTaskLassoCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.MultiTaskLassoCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nMultiTaskLassoCVRegressor\n```\n\nA model type for constructing a multi-target lasso regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskLassoCVRegressor = @load MultiTaskLassoCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskLassoCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskLassoCVRegressor(eps=...)`.\n# Hyper-parameters\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `max_iter = 300`\n\n- `tol = 0.0001`\n\n- `copy_X = true`\n\n- `cv = 5`\n\n- `verbose = false`\n\n- `n_jobs = 1`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "MultiTaskLassoCVRegressor" +":hyperparameters" = "`(:eps, :n_alphas, :alphas, :fit_intercept, :max_iter, :tol, :copy_X, :cv, :verbose, :n_jobs, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "multi-target lasso regressor with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:eps, :n_alphas, :alphas, :fit_intercept, :max_iter, :tol, :copy_X, :cv, :verbose, :n_jobs, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultiTaskLassoCVRegressor\n```\n\nA model type for constructing a multi-target lasso regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskLassoCVRegressor = @load MultiTaskLassoCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskLassoCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskLassoCVRegressor(eps=...)`.\n# Hyper-parameters\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `max_iter = 300`\n\n- `tol = 0.0001`\n\n- `copy_X = true`\n\n- `cv = 5`\n\n- `verbose = false`\n\n- `n_jobs = 1`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "MultiTaskLassoCVRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.OrthogonalMatchingPursuitRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nOrthogonalMatchingPursuitRegressor\n```\n\nA model type for constructing a orthogonal matching pursuit regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitRegressor = @load OrthogonalMatchingPursuitRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitRegressor(n_nonzero_coefs=...)`.\n# Hyper-parameters\n\n- `n_nonzero_coefs = nothing`\n\n- `tol = nothing`\n\n- `fit_intercept = true`\n\n- `precompute = auto`\n\n""" -":name" = "OrthogonalMatchingPursuitRegressor" +":hyperparameters" = "`(:n_nonzero_coefs, :tol, :fit_intercept, :precompute)`" +":is_pure_julia" = "`false`" ":human_name" = "orthogonal matching pursuit regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_nonzero_coefs, :tol, :fit_intercept, :precompute)`" -":hyperparameter_types" = "`(\"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nOrthogonalMatchingPursuitRegressor\n```\n\nA model type for constructing a orthogonal matching pursuit regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitRegressor = @load OrthogonalMatchingPursuitRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitRegressor(n_nonzero_coefs=...)`.\n# Hyper-parameters\n\n- `n_nonzero_coefs = nothing`\n\n- `tol = nothing`\n\n- `fit_intercept = true`\n\n- `precompute = auto`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "OrthogonalMatchingPursuitRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.BernoulliNBClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.BernoulliNBClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nBernoulliNBClassifier\n```\n\nA model type for constructing a Bernoulli naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBernoulliNBClassifier = @load BernoulliNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = BernoulliNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BernoulliNBClassifier(alpha=...)`.\n\nBinomial naive bayes classifier. It is suitable for classification with binary features; features will be binarized based on the `binarize` keyword (unless it's `nothing` in which case the features are assumed to be binary).\n""" -":name" = "BernoulliNBClassifier" +":hyperparameters" = "`(:alpha, :binarize, :fit_prior, :class_prior)`" +":is_pure_julia" = "`false`" ":human_name" = "Bernoulli naive Bayes classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alpha, :binarize, :fit_prior, :class_prior)`" -":hyperparameter_types" = "`(\"Float64\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":docstring" = """```\nBernoulliNBClassifier\n```\n\nA model type for constructing a Bernoulli naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBernoulliNBClassifier = @load BernoulliNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = BernoulliNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BernoulliNBClassifier(alpha=...)`.\n\nBinomial naive bayes classifier. It is suitable for classification with binary features; features will be binarized based on the `binarize` keyword (unless it's `nothing` in which case the features are assumed to be binary).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "BernoulliNBClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.PassiveAggressiveClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Any\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.PassiveAggressiveClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:C, :fit_intercept, :max_iter, :tol, :early_stopping, :validation_fraction, :n_iter_no_change, :shuffle, :verbose, :loss, :n_jobs, :random_state, :warm_start, :class_weight, :average)`" +":is_pure_julia" = "`false`" +":human_name" = "passive aggressive classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nPassiveAggressiveClassifier\n```\n\nA model type for constructing a passive aggressive classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nPassiveAggressiveClassifier = @load PassiveAggressiveClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = PassiveAggressiveClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`PassiveAggressiveClassifier(C=...)`.\n# Hyper-parameters\n\n- `C = 1.0`\n\n- `fit_intercept = true`\n\n- `max_iter = 100`\n\n- `tol = 0.001`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `loss = hinge`\n\n- `n_jobs = nothing`\n\n- `random_state = 0`\n\n- `warm_start = false`\n\n- `class_weight = nothing`\n\n- `average = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "PassiveAggressiveClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nPassiveAggressiveClassifier\n```\n\nA model type for constructing a passive aggressive classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nPassiveAggressiveClassifier = @load PassiveAggressiveClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = PassiveAggressiveClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`PassiveAggressiveClassifier(C=...)`.\n# Hyper-parameters\n\n- `C = 1.0`\n\n- `fit_intercept = true`\n\n- `max_iter = 100`\n\n- `tol = 0.001`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `loss = hinge`\n\n- `n_jobs = nothing`\n\n- `random_state = 0`\n\n- `warm_start = false`\n\n- `class_weight = nothing`\n\n- `average = false`\n\n""" -":name" = "PassiveAggressiveClassifier" -":human_name" = "passive aggressive classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:C, :fit_intercept, :max_iter, :tol, :early_stopping, :validation_fraction, :n_iter_no_change, :shuffle, :verbose, :loss, :n_jobs, :random_state, :warm_start, :class_weight, :average)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Any\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.RidgeCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, String}\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.RidgeCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nRidgeCVRegressor\n```\n\nA model type for constructing a ridge regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeCVRegressor = @load RidgeCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeCVRegressor(alphas=...)`.\n# Hyper-parameters\n\n- `alphas = (0.1, 1.0, 10.0)`\n\n- `fit_intercept = true`\n\n- `scoring = nothing`\n\n- `cv = 5`\n\n- `gcv_mode = nothing`\n\n- `store_cv_values = false`\n\n""" -":name" = "RidgeCVRegressor" +":hyperparameters" = "`(:alphas, :fit_intercept, :scoring, :cv, :gcv_mode, :store_cv_values)`" +":is_pure_julia" = "`false`" ":human_name" = "ridge regressor with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alphas, :fit_intercept, :scoring, :cv, :gcv_mode, :store_cv_values)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, String}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nRidgeCVRegressor\n```\n\nA model type for constructing a ridge regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRidgeCVRegressor = @load RidgeCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RidgeCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RidgeCVRegressor(alphas=...)`.\n# Hyper-parameters\n\n- `alphas = (0.1, 1.0, 10.0)`\n\n- `fit_intercept = true`\n\n- `scoring = nothing`\n\n- `cv = 5`\n\n- `gcv_mode = nothing`\n\n- `store_cv_values = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "RidgeCVRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SVMRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Any\", \"Int64\", \"Int64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SVMRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :C, :epsilon, :shrinking, :cache_size, :max_iter)`" +":is_pure_julia" = "`false`" +":human_name" = "epsilon-support vector regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSVMRegressor\n```\n\nA model type for constructing a epsilon-support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMRegressor = @load SVMRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMRegressor(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `tol = 0.001`\n\n- `C = 1.0`\n\n- `epsilon = 0.1`\n\n- `shrinking = true`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "SVMRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSVMRegressor\n```\n\nA model type for constructing a epsilon-support vector regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMRegressor = @load SVMRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMRegressor(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `tol = 0.001`\n\n- `C = 1.0`\n\n- `epsilon = 0.1`\n\n- `shrinking = true`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n""" -":name" = "SVMRegressor" -":human_name" = "epsilon-support vector regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :C, :epsilon, :shrinking, :cache_size, :max_iter)`" -":hyperparameter_types" = "`(\"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Any\", \"Int64\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.GaussianNBClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector{Float64}}\", \"Float64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.GaussianNBClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:priors, :var_smoothing)`" +":is_pure_julia" = "`false`" +":human_name" = "Gaussian naive Bayes classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nGaussianNBClassifier\n```\n\nA model type for constructing a Gaussian naive Bayes classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianNBClassifier = @load GaussianNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianNBClassifier(priors=...)`.\n# Hyper-parameters\n\n- `priors = nothing`\n\n- `var_smoothing = 1.0e-9`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "GaussianNBClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nGaussianNBClassifier\n```\n\nA model type for constructing a Gaussian naive Bayes classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianNBClassifier = @load GaussianNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianNBClassifier(priors=...)`.\n# Hyper-parameters\n\n- `priors = nothing`\n\n- `var_smoothing = 1.0e-9`\n\n""" -":name" = "GaussianNBClassifier" -":human_name" = "Gaussian naive Bayes classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:priors, :var_smoothing)`" -":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector{Float64}}\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.ExtraTreesClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.ExtraTreesClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nExtraTreesClassifier\n```\n\nA model type for constructing a extra trees classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nExtraTreesClassifier = @load ExtraTreesClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = ExtraTreesClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ExtraTreesClassifier(n_estimators=...)`.\n\nExtra trees classifier, fits a number of randomized decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.\n""" -":name" = "ExtraTreesClassifier" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight, :ccp_alpha, :max_samples, :monotonic_cst)`" +":is_pure_julia" = "`false`" ":human_name" = "extra trees classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight, :ccp_alpha, :max_samples, :monotonic_cst)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJScikitLearnInterface.KMeans] +":docstring" = """```\nExtraTreesClassifier\n```\n\nA model type for constructing a extra trees classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nExtraTreesClassifier = @load ExtraTreesClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = ExtraTreesClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ExtraTreesClassifier(n_estimators=...)`.\n\nExtra trees classifier, fits a number of randomized decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "ExtraTreesClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJScikitLearnInterface.KMeans] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Int64, String}\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.KMeans" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_clusters, :n_init, :max_iter, :tol, :verbose, :random_state, :copy_x, :algorithm, :init)`" +":is_pure_julia" = "`false`" +":human_name" = "k means" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKMeans\n```\n\nA model type for constructing a k means, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMeans = @load KMeans pkg=MLJScikitLearnInterface\n```\n\nDo `model = KMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMeans(n_clusters=...)`.\n\nK-Means algorithm: find K centroids corresponding to K clusters in the data.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "KMeans" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKMeans\n```\n\nA model type for constructing a k means, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMeans = @load KMeans pkg=MLJScikitLearnInterface\n```\n\nDo `model = KMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMeans(n_clusters=...)`.\n\nK-Means algorithm: find K centroids corresponding to K clusters in the data.\n""" -":name" = "KMeans" -":human_name" = "k means" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:n_clusters, :n_init, :max_iter, :tol, :verbose, :random_state, :copy_x, :algorithm, :init)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Int64, String}\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.MultiTaskElasticNetCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.MultiTaskElasticNetCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nMultiTaskElasticNetCVRegressor\n```\n\nA model type for constructing a multi-target elastic net regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskElasticNetCVRegressor = @load MultiTaskElasticNetCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskElasticNetCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskElasticNetCVRegressor(l1_ratio=...)`.\n# Hyper-parameters\n\n- `l1_ratio = 0.5`\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `cv = 5`\n\n- `copy_X = true`\n\n- `verbose = 0`\n\n- `n_jobs = nothing`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "MultiTaskElasticNetCVRegressor" +":hyperparameters" = "`(:l1_ratio, :eps, :n_alphas, :alphas, :fit_intercept, :max_iter, :tol, :cv, :copy_X, :verbose, :n_jobs, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "multi-target elastic net regressor with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:l1_ratio, :eps, :n_alphas, :alphas, :fit_intercept, :max_iter, :tol, :cv, :copy_X, :verbose, :n_jobs, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultiTaskElasticNetCVRegressor\n```\n\nA model type for constructing a multi-target elastic net regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskElasticNetCVRegressor = @load MultiTaskElasticNetCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskElasticNetCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskElasticNetCVRegressor(l1_ratio=...)`.\n# Hyper-parameters\n\n- `l1_ratio = 0.5`\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `cv = 5`\n\n- `copy_X = true`\n\n- `verbose = 0`\n\n- `n_jobs = nothing`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "MultiTaskElasticNetCVRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LassoLarsCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LassoLarsCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLassoLarsCVRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS) with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsCVRegressor = @load LassoLarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n""" -":name" = "LassoLarsCVRegressor" +":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X, :positive)`" +":is_pure_julia" = "`false`" ":human_name" = "Lasso model fit with least angle regression (LARS) with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X, :positive)`" -":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nLassoLarsCVRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS) with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsCVRegressor = @load LassoLarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `positive = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LassoLarsCVRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Union{Nothing, Int64}\", \"Union{Bool, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.OrthogonalMatchingPursuitCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nOrthogonalMatchingPursuitCVRegressor\n```\n\nA model type for constructing a orthogonal ,atching pursuit (OMP) model with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitCVRegressor = @load OrthogonalMatchingPursuitCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitCVRegressor(copy=...)`.\n# Hyper-parameters\n\n- `copy = true`\n\n- `fit_intercept = true`\n\n- `max_iter = nothing`\n\n- `cv = 5`\n\n- `n_jobs = 1`\n\n- `verbose = false`\n\n""" -":name" = "OrthogonalMatchingPursuitCVRegressor" +":hyperparameters" = "`(:copy, :fit_intercept, :max_iter, :cv, :n_jobs, :verbose)`" +":is_pure_julia" = "`false`" ":human_name" = "orthogonal ,atching pursuit (OMP) model with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:copy, :fit_intercept, :max_iter, :cv, :n_jobs, :verbose)`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Union{Nothing, Int64}\", \"Union{Bool, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nOrthogonalMatchingPursuitCVRegressor\n```\n\nA model type for constructing a orthogonal ,atching pursuit (OMP) model with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nOrthogonalMatchingPursuitCVRegressor = @load OrthogonalMatchingPursuitCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = OrthogonalMatchingPursuitCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`OrthogonalMatchingPursuitCVRegressor(copy=...)`.\n# Hyper-parameters\n\n- `copy = true`\n\n- `fit_intercept = true`\n\n- `max_iter = nothing`\n\n- `cv = 5`\n\n- `n_jobs = 1`\n\n- `verbose = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "OrthogonalMatchingPursuitCVRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.AdaBoostClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.AdaBoostClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nAdaBoostClassifier\n```\n\nA model type for constructing a ada boost classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostClassifier = @load AdaBoostClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = AdaBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostClassifier(estimator=...)`.\n\nAn AdaBoost classifier is a meta-estimator that begins by fitting a classifier on the original dataset and then fits additional copies of the classifier on the same dataset but where the weights of incorrectly classified instances are adjusted such that subsequent classifiers focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME.\n""" -":name" = "AdaBoostClassifier" +":hyperparameters" = "`(:estimator, :n_estimators, :learning_rate, :algorithm, :random_state)`" +":is_pure_julia" = "`false`" ":human_name" = "ada boost classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:estimator, :n_estimators, :learning_rate, :algorithm, :random_state)`" -":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":docstring" = """```\nAdaBoostClassifier\n```\n\nA model type for constructing a ada boost classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostClassifier = @load AdaBoostClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = AdaBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostClassifier(estimator=...)`.\n\nAn AdaBoost classifier is a meta-estimator that begins by fitting a classifier on the original dataset and then fits additional copies of the classifier on the same dataset but where the weights of incorrectly classified instances are adjusted such that subsequent classifiers focus more on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "AdaBoostClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.PassiveAggressiveRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\", \"String\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.PassiveAggressiveRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nPassiveAggressiveRegressor\n```\n\nA model type for constructing a passive aggressive regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nPassiveAggressiveRegressor = @load PassiveAggressiveRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = PassiveAggressiveRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`PassiveAggressiveRegressor(C=...)`.\n# Hyper-parameters\n\n- `C = 1.0`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `loss = epsilon_insensitive`\n\n- `epsilon = 0.1`\n\n- `random_state = nothing`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" -":name" = "PassiveAggressiveRegressor" +":hyperparameters" = "`(:C, :fit_intercept, :max_iter, :tol, :early_stopping, :validation_fraction, :n_iter_no_change, :shuffle, :verbose, :loss, :epsilon, :random_state, :warm_start, :average)`" +":is_pure_julia" = "`false`" ":human_name" = "passive aggressive regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:C, :fit_intercept, :max_iter, :tol, :early_stopping, :validation_fraction, :n_iter_no_change, :shuffle, :verbose, :loss, :epsilon, :random_state, :warm_start, :average)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\", \"String\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nPassiveAggressiveRegressor\n```\n\nA model type for constructing a passive aggressive regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nPassiveAggressiveRegressor = @load PassiveAggressiveRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = PassiveAggressiveRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`PassiveAggressiveRegressor(C=...)`.\n# Hyper-parameters\n\n- `C = 1.0`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `loss = epsilon_insensitive`\n\n- `epsilon = 0.1`\n\n- `random_state = nothing`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "PassiveAggressiveRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.BayesianRidgeRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.BayesianRidgeRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nBayesianRidgeRegressor\n```\n\nA model type for constructing a Bayesian ridge regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianRidgeRegressor = @load BayesianRidgeRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianRidgeRegressor(max_iter=...)`.\n# Hyper-parameters\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n""" -":name" = "BayesianRidgeRegressor" +":hyperparameters" = "`(:max_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :fit_intercept, :copy_X, :verbose)`" +":is_pure_julia" = "`false`" ":human_name" = "Bayesian ridge regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:max_iter, :tol, :alpha_1, :alpha_2, :lambda_1, :lambda_2, :compute_score, :fit_intercept, :copy_X, :verbose)`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nBayesianRidgeRegressor\n```\n\nA model type for constructing a Bayesian ridge regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianRidgeRegressor = @load BayesianRidgeRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianRidgeRegressor(max_iter=...)`.\n# Hyper-parameters\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `alpha_1 = 1.0e-6`\n\n- `alpha_2 = 1.0e-6`\n\n- `lambda_1 = 1.0e-6`\n\n- `lambda_2 = 1.0e-6`\n\n- `compute_score = false`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `verbose = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "BayesianRidgeRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.GaussianProcessClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Int64\", \"Bool\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.GaussianProcessClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:kernel, :optimizer, :n_restarts_optimizer, :copy_X_train, :random_state, :max_iter_predict, :warm_start, :multi_class)`" +":is_pure_julia" = "`false`" +":human_name" = "Gaussian process classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nGaussianProcessClassifier\n```\n\nA model type for constructing a Gaussian process classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianProcessClassifier = @load GaussianProcessClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianProcessClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianProcessClassifier(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = nothing`\n\n- `optimizer = fmin_l_bfgs_b`\n\n- `n_restarts_optimizer = 0`\n\n- `copy_X_train = true`\n\n- `random_state = nothing`\n\n- `max_iter_predict = 100`\n\n- `warm_start = false`\n\n- `multi_class = one_vs_rest`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "GaussianProcessClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nGaussianProcessClassifier\n```\n\nA model type for constructing a Gaussian process classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nGaussianProcessClassifier = @load GaussianProcessClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = GaussianProcessClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`GaussianProcessClassifier(kernel=...)`.\n# Hyper-parameters\n\n- `kernel = nothing`\n\n- `optimizer = fmin_l_bfgs_b`\n\n- `n_restarts_optimizer = 0`\n\n- `copy_X_train = true`\n\n- `random_state = nothing`\n\n- `max_iter_predict = 100`\n\n- `warm_start = false`\n\n- `multi_class = one_vs_rest`\n\n""" -":name" = "GaussianProcessClassifier" -":human_name" = "Gaussian process classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :optimizer, :n_restarts_optimizer, :copy_X_train, :random_state, :max_iter_predict, :warm_start, :multi_class)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Int64\", \"Bool\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.BaggingClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.BaggingClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:estimator, :n_estimators, :max_samples, :max_features, :bootstrap, :bootstrap_features, :oob_score, :warm_start, :n_jobs, :random_state, :verbose)`" +":is_pure_julia" = "`false`" +":human_name" = "bagging ensemble classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBaggingClassifier\n```\n\nA model type for constructing a bagging ensemble classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBaggingClassifier = @load BaggingClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = BaggingClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BaggingClassifier(estimator=...)`.\n\nA Bagging classifier is an ensemble meta-estimator that fits base classifiers each on random subsets of the original dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator (e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble out of it.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "BaggingClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBaggingClassifier\n```\n\nA model type for constructing a bagging ensemble classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBaggingClassifier = @load BaggingClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = BaggingClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BaggingClassifier(estimator=...)`.\n\nA Bagging classifier is an ensemble meta-estimator that fits base classifiers each on random subsets of the original dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator (e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble out of it.\n""" -":name" = "BaggingClassifier" -":human_name" = "bagging ensemble classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:estimator, :n_estimators, :max_samples, :max_features, :bootstrap, :bootstrap_features, :oob_score, :warm_start, :n_jobs, :random_state, :verbose)`" -":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.OPTICS] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Float64\", \"String\", \"Int64\", \"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Bool\", \"Union{Nothing, Float64, Int64}\", \"String\", \"Int64\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.OPTICS" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:min_samples, :max_eps, :metric, :p, :cluster_method, :eps, :xi, :predecessor_correction, :min_cluster_size, :algorithm, :leaf_size, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "optics" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nOPTICS\n```\n\nA model type for constructing a optics, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOPTICS = @load OPTICS pkg=MLJScikitLearnInterface\n```\n\nDo `model = OPTICS()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OPTICS(min_samples=...)`.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely related to [`DBSCAN'](@ref), finds core sample of high density and expands clusters from them. Unlike DBSCAN, keeps cluster hierarchy for a variable neighborhood radius. Better suited for usage on large datasets than the current sklearn implementation of DBSCAN.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "OPTICS" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nOPTICS\n```\n\nA model type for constructing a optics, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOPTICS = @load OPTICS pkg=MLJScikitLearnInterface\n```\n\nDo `model = OPTICS()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OPTICS(min_samples=...)`.\n\nOPTICS (Ordering Points To Identify the Clustering Structure), closely related to [`DBSCAN'](@ref), finds core sample of high density and expands clusters from them. Unlike DBSCAN, keeps cluster hierarchy for a variable neighborhood radius. Better suited for usage on large datasets than the current sklearn implementation of DBSCAN.\n""" -":name" = "OPTICS" -":human_name" = "optics" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params"] -":hyperparameters" = "`(:min_samples, :max_eps, :metric, :p, :cluster_method, :eps, :xi, :predecessor_correction, :min_cluster_size, :algorithm, :leaf_size, :n_jobs)`" -":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Float64\", \"String\", \"Int64\", \"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Bool\", \"Union{Nothing, Float64, Int64}\", \"String\", \"Int64\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.RANSACRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64}\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Function, String}\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.RANSACRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:estimator, :min_samples, :residual_threshold, :is_data_valid, :is_model_valid, :max_trials, :max_skips, :stop_n_inliers, :stop_score, :stop_probability, :loss, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "ransac regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nRANSACRegressor\n```\n\nA model type for constructing a ransac regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRANSACRegressor = @load RANSACRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RANSACRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RANSACRegressor(estimator=...)`.\n# Hyper-parameters\n\n- `estimator = nothing`\n\n- `min_samples = 5`\n\n- `residual_threshold = nothing`\n\n- `is_data_valid = nothing`\n\n- `is_model_valid = nothing`\n\n- `max_trials = 100`\n\n- `max_skips = 9223372036854775807`\n\n- `stop_n_inliers = 9223372036854775807`\n\n- `stop_score = Inf`\n\n- `stop_probability = 0.99`\n\n- `loss = absolute_error`\n\n- `random_state = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "RANSACRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nRANSACRegressor\n```\n\nA model type for constructing a ransac regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nRANSACRegressor = @load RANSACRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = RANSACRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`RANSACRegressor(estimator=...)`.\n# Hyper-parameters\n\n- `estimator = nothing`\n\n- `min_samples = 5`\n\n- `residual_threshold = nothing`\n\n- `is_data_valid = nothing`\n\n- `is_model_valid = nothing`\n\n- `max_trials = 100`\n\n- `max_skips = 9223372036854775807`\n\n- `stop_n_inliers = 9223372036854775807`\n\n- `stop_score = Inf`\n\n- `stop_probability = 0.99`\n\n- `loss = absolute_error`\n\n- `random_state = nothing`\n\n""" -":name" = "RANSACRegressor" -":human_name" = "ransac regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:estimator, :min_samples, :residual_threshold, :is_data_valid, :is_model_valid, :max_trials, :max_skips, :stop_n_inliers, :stop_score, :stop_probability, :loss, :random_state)`" -":hyperparameter_types" = "`(\"Any\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64}\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Function, String}\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.KNeighborsRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.KNeighborsRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_neighbors, :weights, :algorithm, :leaf_size, :p, :metric, :metric_params, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "K-nearest neighbors regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKNeighborsRegressor\n```\n\nA model type for constructing a K-nearest neighbors regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nKNeighborsRegressor = @load KNeighborsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = KNeighborsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`KNeighborsRegressor(n_neighbors=...)`.\n# Hyper-parameters\n\n- `n_neighbors = 5`\n\n- `weights = uniform`\n\n- `algorithm = auto`\n\n- `leaf_size = 30`\n\n- `p = 2`\n\n- `metric = minkowski`\n\n- `metric_params = nothing`\n\n- `n_jobs = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "KNeighborsRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKNeighborsRegressor\n```\n\nA model type for constructing a K-nearest neighbors regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nKNeighborsRegressor = @load KNeighborsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = KNeighborsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`KNeighborsRegressor(n_neighbors=...)`.\n# Hyper-parameters\n\n- `n_neighbors = 5`\n\n- `weights = uniform`\n\n- `algorithm = auto`\n\n- `leaf_size = 30`\n\n- `p = 2`\n\n- `metric = minkowski`\n\n- `metric_params = nothing`\n\n- `n_jobs = nothing`\n\n""" -":name" = "KNeighborsRegressor" -":human_name" = "K-nearest neighbors regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:n_neighbors, :weights, :algorithm, :leaf_size, :p, :metric, :metric_params, :n_jobs)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.HistGradientBoostingRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.HistGradientBoostingRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:loss, :quantile, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :interaction_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "gradient boosting ensemble regression" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nHistGradientBoostingRegressor\n```\n\nA model type for constructing a gradient boosting ensemble regression, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHistGradientBoostingRegressor = @load HistGradientBoostingRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = HistGradientBoostingRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HistGradientBoostingRegressor(loss=...)`.\n\nThis estimator builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage a regression tree is fit on the negative gradient of the given loss function.\n\n[`HistGradientBoostingRegressor`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "HistGradientBoostingRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nHistGradientBoostingRegressor\n```\n\nA model type for constructing a gradient boosting ensemble regression, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHistGradientBoostingRegressor = @load HistGradientBoostingRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = HistGradientBoostingRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HistGradientBoostingRegressor(loss=...)`.\n\nThis estimator builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage a regression tree is fit on the negative gradient of the given loss function.\n\n[`HistGradientBoostingRegressor`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" -":name" = "HistGradientBoostingRegressor" -":human_name" = "gradient boosting ensemble regression" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:loss, :quantile, :learning_rate, :max_iter, :max_leaf_nodes, :max_depth, :min_samples_leaf, :l2_regularization, :max_bins, :categorical_features, :monotonic_cst, :interaction_cst, :warm_start, :early_stopping, :scoring, :validation_fraction, :n_iter_no_change, :tol, :random_state)`" -":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.MiniBatchKMeans] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Int64, String}\", \"Union{String, AbstractArray}\", \"Float64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.MiniBatchKMeans" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_clusters, :max_iter, :batch_size, :verbose, :compute_labels, :random_state, :tol, :max_no_improvement, :init_size, :n_init, :init, :reassignment_ratio)`" +":is_pure_julia" = "`false`" +":human_name" = "Mini-Batch K-Means clustering." +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMiniBatchKMeans\n```\n\nA model type for constructing a Mini-Batch K-Means clustering., based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMiniBatchKMeans = @load MiniBatchKMeans pkg=MLJScikitLearnInterface\n```\n\nDo `model = MiniBatchKMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MiniBatchKMeans(n_clusters=...)`.\n# Hyper-parameters\n\n- `n_clusters = 8`\n\n- `max_iter = 100`\n\n- `batch_size = 100`\n\n- `verbose = 0`\n\n- `compute_labels = true`\n\n- `random_state = nothing`\n\n- `tol = 0.0`\n\n- `max_no_improvement = 10`\n\n- `init_size = nothing`\n\n- `n_init = 3`\n\n- `init = k-means++`\n\n- `reassignment_ratio = 0.01`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "MiniBatchKMeans" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMiniBatchKMeans\n```\n\nA model type for constructing a Mini-Batch K-Means clustering., based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMiniBatchKMeans = @load MiniBatchKMeans pkg=MLJScikitLearnInterface\n```\n\nDo `model = MiniBatchKMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MiniBatchKMeans(n_clusters=...)`.\n# Hyper-parameters\n\n- `n_clusters = 8`\n\n- `max_iter = 100`\n\n- `batch_size = 100`\n\n- `verbose = 0`\n\n- `compute_labels = true`\n\n- `random_state = nothing`\n\n- `tol = 0.0`\n\n- `max_no_improvement = 10`\n\n- `init_size = nothing`\n\n- `n_init = 3`\n\n- `init = k-means++`\n\n- `reassignment_ratio = 0.01`\n\n""" -":name" = "MiniBatchKMeans" -":human_name" = "Mini-Batch K-Means clustering." -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:n_clusters, :max_iter, :batch_size, :verbose, :compute_labels, :random_state, :tol, :max_no_improvement, :init_size, :n_init, :init, :reassignment_ratio)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Int64, String}\", \"Union{String, AbstractArray}\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LassoCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LassoCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLassoCVRegressor\n```\n\nA model type for constructing a lasso regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoCVRegressor = @load LassoCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoCVRegressor(eps=...)`.\n# Hyper-parameters\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `precompute = auto`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `copy_X = true`\n\n- `cv = 5`\n\n- `verbose = false`\n\n- `n_jobs = nothing`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "LassoCVRegressor" +":hyperparameters" = "`(:eps, :n_alphas, :alphas, :fit_intercept, :precompute, :max_iter, :tol, :copy_X, :cv, :verbose, :n_jobs, :positive, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "lasso regressor with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:eps, :n_alphas, :alphas, :fit_intercept, :precompute, :max_iter, :tol, :copy_X, :cv, :verbose, :n_jobs, :positive, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":docstring" = """```\nLassoCVRegressor\n```\n\nA model type for constructing a lasso regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoCVRegressor = @load LassoCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoCVRegressor(eps=...)`.\n# Hyper-parameters\n\n- `eps = 0.001`\n\n- `n_alphas = 100`\n\n- `alphas = nothing`\n\n- `fit_intercept = true`\n\n- `precompute = auto`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `copy_X = true`\n\n- `cv = 5`\n\n- `verbose = false`\n\n- `n_jobs = nothing`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LassoCVRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.DummyRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Any\", \"Float64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.DummyRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:strategy, :constant, :quantile)`" +":is_pure_julia" = "`false`" +":human_name" = "dummy regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nDummyRegressor\n```\n\nA model type for constructing a dummy regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDummyRegressor = @load DummyRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = DummyRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DummyRegressor(strategy=...)`.\n\nDummyRegressor is a regressor that makes predictions using simple rules.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "DummyRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nDummyRegressor\n```\n\nA model type for constructing a dummy regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDummyRegressor = @load DummyRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = DummyRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DummyRegressor(strategy=...)`.\n\nDummyRegressor is a regressor that makes predictions using simple rules.\n""" -":name" = "DummyRegressor" -":human_name" = "dummy regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:strategy, :constant, :quantile)`" -":hyperparameter_types" = "`(\"String\", \"Any\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.BisectingKMeans] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.BisectingKMeans" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_clusters, :n_init, :max_iter, :tol, :verbose, :random_state, :copy_x, :algorithm, :init, :bisecting_strategy)`" +":is_pure_julia" = "`false`" +":human_name" = "bisecting k means" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBisectingKMeans\n```\n\nA model type for constructing a bisecting k means, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBisectingKMeans = @load BisectingKMeans pkg=MLJScikitLearnInterface\n```\n\nDo `model = BisectingKMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BisectingKMeans(n_clusters=...)`.\n\nBisecting K-Means clustering.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "BisectingKMeans" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBisectingKMeans\n```\n\nA model type for constructing a bisecting k means, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBisectingKMeans = @load BisectingKMeans pkg=MLJScikitLearnInterface\n```\n\nDo `model = BisectingKMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BisectingKMeans(n_clusters=...)`.\n\nBisecting K-Means clustering.\n""" -":name" = "BisectingKMeans" -":human_name" = "bisecting k means" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:n_clusters, :n_init, :max_iter, :tol, :verbose, :random_state, :copy_x, :algorithm, :init, :bisecting_strategy)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LassoLarsRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LassoLarsRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLassoLarsRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsRegressor = @load LassoLarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n- `positive = false`\n\n""" -":name" = "LassoLarsRegressor" +":hyperparameters" = "`(:alpha, :fit_intercept, :verbose, :precompute, :max_iter, :eps, :copy_X, :fit_path, :positive)`" +":is_pure_julia" = "`false`" ":human_name" = "Lasso model fit with least angle regression (LARS)" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alpha, :fit_intercept, :verbose, :precompute, :max_iter, :eps, :copy_X, :fit_path, :positive)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nLassoLarsRegressor\n```\n\nA model type for constructing a Lasso model fit with least angle regression (LARS), based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoLarsRegressor = @load LassoLarsRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoLarsRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoLarsRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `precompute = auto`\n\n- `max_iter = 500`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n- `fit_path = true`\n\n- `positive = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LassoLarsRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LarsCVRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LarsCVRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLarsCVRegressor\n```\n\nA model type for constructing a least angle regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsCVRegressor = @load LarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n""" -":name" = "LarsCVRegressor" +":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X)`" +":is_pure_julia" = "`false`" ":human_name" = "least angle regressor with built-in cross-validation" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :verbose, :max_iter, :precompute, :cv, :max_n_alphas, :n_jobs, :eps, :copy_X)`" -":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nLarsCVRegressor\n```\n\nA model type for constructing a least angle regressor with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLarsCVRegressor = @load LarsCVRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LarsCVRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LarsCVRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `verbose = false`\n\n- `max_iter = 500`\n\n- `precompute = auto`\n\n- `cv = 5`\n\n- `max_n_alphas = 1000`\n\n- `n_jobs = nothing`\n\n- `eps = 2.220446049250313e-16`\n\n- `copy_X = true`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LarsCVRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.KNeighborsClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.KNeighborsClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_neighbors, :weights, :algorithm, :leaf_size, :p, :metric, :metric_params, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "K-nearest neighbors classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKNeighborsClassifier\n```\n\nA model type for constructing a K-nearest neighbors classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nKNeighborsClassifier = @load KNeighborsClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = KNeighborsClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`KNeighborsClassifier(n_neighbors=...)`.\n# Hyper-parameters\n\n- `n_neighbors = 5`\n\n- `weights = uniform`\n\n- `algorithm = auto`\n\n- `leaf_size = 30`\n\n- `p = 2`\n\n- `metric = minkowski`\n\n- `metric_params = nothing`\n\n- `n_jobs = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "KNeighborsClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKNeighborsClassifier\n```\n\nA model type for constructing a K-nearest neighbors classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nKNeighborsClassifier = @load KNeighborsClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = KNeighborsClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`KNeighborsClassifier(n_neighbors=...)`.\n# Hyper-parameters\n\n- `n_neighbors = 5`\n\n- `weights = uniform`\n\n- `algorithm = auto`\n\n- `leaf_size = 30`\n\n- `p = 2`\n\n- `metric = minkowski`\n\n- `metric_params = nothing`\n\n- `n_jobs = nothing`\n\n""" -":name" = "KNeighborsClassifier" -":human_name" = "K-nearest neighbors classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:n_neighbors, :weights, :algorithm, :leaf_size, :p, :metric, :metric_params, :n_jobs)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SVMLinearClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Bool\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Any\", \"Int64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SVMLinearClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:penalty, :loss, :dual, :tol, :C, :multi_class, :fit_intercept, :intercept_scaling, :random_state, :max_iter)`" +":is_pure_julia" = "`false`" +":human_name" = "linear support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSVMLinearClassifier\n```\n\nA model type for constructing a linear support vector classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMLinearClassifier = @load SVMLinearClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMLinearClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMLinearClassifier(penalty=...)`.\n# Hyper-parameters\n\n- `penalty = l2`\n\n- `loss = squared_hinge`\n\n- `dual = true`\n\n- `tol = 0.0001`\n\n- `C = 1.0`\n\n- `multi_class = ovr`\n\n- `fit_intercept = true`\n\n- `intercept_scaling = 1.0`\n\n- `random_state = nothing`\n\n- `max_iter = 1000`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "SVMLinearClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSVMLinearClassifier\n```\n\nA model type for constructing a linear support vector classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMLinearClassifier = @load SVMLinearClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMLinearClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMLinearClassifier(penalty=...)`.\n# Hyper-parameters\n\n- `penalty = l2`\n\n- `loss = squared_hinge`\n\n- `dual = true`\n\n- `tol = 0.0001`\n\n- `C = 1.0`\n\n- `multi_class = ovr`\n\n- `fit_intercept = true`\n\n- `intercept_scaling = 1.0`\n\n- `random_state = nothing`\n\n- `max_iter = 1000`\n\n""" -":name" = "SVMLinearClassifier" -":human_name" = "linear support vector classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:penalty, :loss, :dual, :tol, :C, :multi_class, :fit_intercept, :intercept_scaling, :random_state, :max_iter)`" -":hyperparameter_types" = "`(\"String\", \"String\", \"Bool\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Any\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.FeatureAgglomeration] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Any\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.FeatureAgglomeration" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_clusters, :memory, :connectivity, :metric, :compute_full_tree, :linkage, :distance_threshold)`" +":is_pure_julia" = "`false`" +":human_name" = "feature agglomeration" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nFeatureAgglomeration\n```\n\nA model type for constructing a feature agglomeration, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureAgglomeration = @load FeatureAgglomeration pkg=MLJScikitLearnInterface\n```\n\nDo `model = FeatureAgglomeration()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureAgglomeration(n_clusters=...)`.\n\nSimilar to [`AgglomerativeClustering`](@ref), but recursively merges features instead of samples.\"\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "FeatureAgglomeration" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nFeatureAgglomeration\n```\n\nA model type for constructing a feature agglomeration, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureAgglomeration = @load FeatureAgglomeration pkg=MLJScikitLearnInterface\n```\n\nDo `model = FeatureAgglomeration()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureAgglomeration(n_clusters=...)`.\n\nSimilar to [`AgglomerativeClustering`](@ref), but recursively merges features instead of samples.\"\n""" -":name" = "FeatureAgglomeration" -":human_name" = "feature agglomeration" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:n_clusters, :memory, :connectivity, :metric, :compute_full_tree, :linkage, :distance_threshold)`" -":hyperparameter_types" = "`(\"Int64\", \"Any\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.DummyClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Any\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.DummyClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:strategy, :constant, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "dummy classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nDummyClassifier\n```\n\nA model type for constructing a dummy classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDummyClassifier = @load DummyClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = DummyClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DummyClassifier(strategy=...)`.\n\nDummyClassifier is a classifier that makes predictions using simple rules.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "DummyClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nDummyClassifier\n```\n\nA model type for constructing a dummy classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDummyClassifier = @load DummyClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = DummyClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DummyClassifier(strategy=...)`.\n\nDummyClassifier is a classifier that makes predictions using simple rules.\n""" -":name" = "DummyClassifier" -":human_name" = "dummy classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:strategy, :constant, :random_state)`" -":hyperparameter_types" = "`(\"String\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.BaggingRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.BaggingRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:estimator, :n_estimators, :max_samples, :max_features, :bootstrap, :bootstrap_features, :oob_score, :warm_start, :n_jobs, :random_state, :verbose)`" +":is_pure_julia" = "`false`" +":human_name" = "bagging ensemble regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBaggingRegressor\n```\n\nA model type for constructing a bagging ensemble regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBaggingRegressor = @load BaggingRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = BaggingRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BaggingRegressor(estimator=...)`.\n\nA Bagging regressor is an ensemble meta-estimator that fits base regressors each on random subsets of the original dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator (e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble out of it.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "BaggingRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBaggingRegressor\n```\n\nA model type for constructing a bagging ensemble regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBaggingRegressor = @load BaggingRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = BaggingRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BaggingRegressor(estimator=...)`.\n\nA Bagging regressor is an ensemble meta-estimator that fits base regressors each on random subsets of the original dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction. Such a meta-estimator can typically be used as a way to reduce the variance of a black-box estimator (e.g., a decision tree), by introducing randomization into its construction procedure and then making an ensemble out of it.\n""" -":name" = "BaggingRegressor" -":human_name" = "bagging ensemble regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:estimator, :n_estimators, :max_samples, :max_features, :bootstrap, :bootstrap_features, :oob_score, :warm_start, :n_jobs, :random_state, :verbose)`" -":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.BayesianQDA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector}\", \"Float64\", \"Bool\", \"Float64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.BayesianQDA" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:priors, :reg_param, :store_covariance, :tol)`" +":is_pure_julia" = "`false`" +":human_name" = "Bayesian quadratic discriminant analysis" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBayesianQDA\n```\n\nA model type for constructing a Bayesian quadratic discriminant analysis, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianQDA = @load BayesianQDA pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianQDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianQDA(priors=...)`.\n# Hyper-parameters\n\n- `priors = nothing`\n\n- `reg_param = 0.0`\n\n- `store_covariance = false`\n\n- `tol = 0.0001`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "BayesianQDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBayesianQDA\n```\n\nA model type for constructing a Bayesian quadratic discriminant analysis, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianQDA = @load BayesianQDA pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianQDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianQDA(priors=...)`.\n# Hyper-parameters\n\n- `priors = nothing`\n\n- `reg_param = 0.0`\n\n- `store_covariance = false`\n\n- `tol = 0.0001`\n\n""" -":name" = "BayesianQDA" -":human_name" = "Bayesian quadratic discriminant analysis" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:priors, :reg_param, :store_covariance, :tol)`" -":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector}\", \"Float64\", \"Bool\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJScikitLearnInterface.BayesianLDA] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":is_wrapper" = "`false`" + +[MLJScikitLearnInterface.BayesianLDA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64, String}\", \"Union{Nothing, AbstractVector}\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.BayesianLDA" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian linear discriminant analysis, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianLDA = @load BayesianLDA pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianLDA(solver=...)`.\n# Hyper-parameters\n\n- `solver = svd`\n\n- `shrinkage = nothing`\n\n- `priors = nothing`\n\n- `n_components = nothing`\n\n- `store_covariance = false`\n\n- `tol = 0.0001`\n\n- `covariance_estimator = nothing`\n\n""" -":name" = "BayesianLDA" +":hyperparameters" = "`(:solver, :shrinkage, :priors, :n_components, :store_covariance, :tol, :covariance_estimator)`" +":is_pure_julia" = "`false`" ":human_name" = "Bayesian linear discriminant analysis" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:solver, :shrinkage, :priors, :n_components, :store_covariance, :tol, :covariance_estimator)`" -":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64, String}\", \"Union{Nothing, AbstractVector}\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian linear discriminant analysis, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nBayesianLDA = @load BayesianLDA pkg=MLJScikitLearnInterface\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`BayesianLDA(solver=...)`.\n# Hyper-parameters\n\n- `solver = svd`\n\n- `shrinkage = nothing`\n\n- `priors = nothing`\n\n- `n_components = nothing`\n\n- `store_covariance = false`\n\n- `tol = 0.0001`\n\n- `covariance_estimator = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "BayesianLDA" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SGDClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SGDClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:loss, :penalty, :alpha, :l1_ratio, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :epsilon, :n_jobs, :random_state, :learning_rate, :eta0, :power_t, :early_stopping, :validation_fraction, :n_iter_no_change, :class_weight, :warm_start, :average)`" +":is_pure_julia" = "`false`" +":human_name" = "sgd classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSGDClassifier\n```\n\nA model type for constructing a sgd classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSGDClassifier = @load SGDClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SGDClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SGDClassifier(loss=...)`.\n# Hyper-parameters\n\n- `loss = hinge`\n\n- `penalty = l2`\n\n- `alpha = 0.0001`\n\n- `l1_ratio = 0.15`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `epsilon = 0.1`\n\n- `n_jobs = nothing`\n\n- `random_state = nothing`\n\n- `learning_rate = optimal`\n\n- `eta0 = 0.0`\n\n- `power_t = 0.5`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `class_weight = nothing`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "SGDClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSGDClassifier\n```\n\nA model type for constructing a sgd classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSGDClassifier = @load SGDClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SGDClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SGDClassifier(loss=...)`.\n# Hyper-parameters\n\n- `loss = hinge`\n\n- `penalty = l2`\n\n- `alpha = 0.0001`\n\n- `l1_ratio = 0.15`\n\n- `fit_intercept = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.001`\n\n- `shuffle = true`\n\n- `verbose = 0`\n\n- `epsilon = 0.1`\n\n- `n_jobs = nothing`\n\n- `random_state = nothing`\n\n- `learning_rate = optimal`\n\n- `eta0 = 0.0`\n\n- `power_t = 0.5`\n\n- `early_stopping = false`\n\n- `validation_fraction = 0.1`\n\n- `n_iter_no_change = 5`\n\n- `class_weight = nothing`\n\n- `warm_start = false`\n\n- `average = false`\n\n""" -":name" = "SGDClassifier" -":human_name" = "sgd classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:loss, :penalty, :alpha, :l1_ratio, :fit_intercept, :max_iter, :tol, :shuffle, :verbose, :epsilon, :n_jobs, :random_state, :learning_rate, :eta0, :power_t, :early_stopping, :validation_fraction, :n_iter_no_change, :class_weight, :warm_start, :average)`" -":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.TheilSenRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Float64\", \"Any\", \"Union{Nothing, Int64}\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.TheilSenRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nTheilSenRegressor\n```\n\nA model type for constructing a Theil-Sen regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nTheilSenRegressor = @load TheilSenRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = TheilSenRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`TheilSenRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_subpopulation = 10000`\n\n- `n_subsamples = nothing`\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `random_state = nothing`\n\n- `n_jobs = nothing`\n\n- `verbose = false`\n\n""" -":name" = "TheilSenRegressor" +":hyperparameters" = "`(:fit_intercept, :copy_X, :max_subpopulation, :n_subsamples, :max_iter, :tol, :random_state, :n_jobs, :verbose)`" +":is_pure_julia" = "`false`" ":human_name" = "Theil-Sen regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:fit_intercept, :copy_X, :max_subpopulation, :n_subsamples, :max_iter, :tol, :random_state, :n_jobs, :verbose)`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Float64\", \"Any\", \"Union{Nothing, Int64}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nTheilSenRegressor\n```\n\nA model type for constructing a Theil-Sen regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nTheilSenRegressor = @load TheilSenRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = TheilSenRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`TheilSenRegressor(fit_intercept=...)`.\n# Hyper-parameters\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_subpopulation = 10000`\n\n- `n_subsamples = nothing`\n\n- `max_iter = 300`\n\n- `tol = 0.001`\n\n- `random_state = nothing`\n\n- `n_jobs = nothing`\n\n- `verbose = false`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "TheilSenRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SpectralClustering] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, String}\", \"Any\", \"Int64\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.SpectralClustering" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_clusters, :eigen_solver, :random_state, :n_init, :gamma, :affinity, :n_neighbors, :eigen_tol, :assign_labels, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "spectral clustering" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSpectralClustering\n```\n\nA model type for constructing a spectral clustering, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSpectralClustering = @load SpectralClustering pkg=MLJScikitLearnInterface\n```\n\nDo `model = SpectralClustering()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SpectralClustering(n_clusters=...)`.\n\nApply clustering to a projection of the normalized Laplacian. In practice spectral clustering is very useful when the structure of the individual clusters is highly non-convex or more generally when a measure of the center and spread of the cluster is not a suitable description of the complete cluster. For instance when clusters are nested circles on the 2D plane.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "SpectralClustering" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSpectralClustering\n```\n\nA model type for constructing a spectral clustering, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSpectralClustering = @load SpectralClustering pkg=MLJScikitLearnInterface\n```\n\nDo `model = SpectralClustering()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SpectralClustering(n_clusters=...)`.\n\nApply clustering to a projection of the normalized Laplacian. In practice spectral clustering is very useful when the structure of the individual clusters is highly non-convex or more generally when a measure of the center and spread of the cluster is not a suitable description of the complete cluster. For instance when clusters are nested circles on the 2D plane.\n""" -":name" = "SpectralClustering" -":human_name" = "spectral clustering" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params"] -":hyperparameters" = "`(:n_clusters, :eigen_solver, :random_state, :n_init, :gamma, :affinity, :n_neighbors, :eigen_tol, :assign_labels, :n_jobs)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, String}\", \"Any\", \"Int64\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.Birch] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.Birch" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:threshold, :branching_factor, :n_clusters, :compute_labels, :copy)`" +":is_pure_julia" = "`false`" +":human_name" = "birch" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBirch\n```\n\nA model type for constructing a birch, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBirch = @load Birch pkg=MLJScikitLearnInterface\n```\n\nDo `model = Birch()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Birch(threshold=...)`.\n\nMemory-efficient, online-learning algorithm provided as an alternative to MiniBatchKMeans. Note: noisy samples are given the label -1.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "Birch" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBirch\n```\n\nA model type for constructing a birch, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBirch = @load Birch pkg=MLJScikitLearnInterface\n```\n\nDo `model = Birch()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Birch(threshold=...)`.\n\nMemory-efficient, online-learning algorithm provided as an alternative to MiniBatchKMeans. Note: noisy samples are given the label -1.\n""" -":name" = "Birch" -":human_name" = "birch" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:threshold, :branching_factor, :n_clusters, :compute_labels, :copy)`" -":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.AgglomerativeClustering] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "BSD" +":prediction_type" = ":unknown" ":load_path" = "MLJScikitLearnInterface.AgglomerativeClustering" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:n_clusters, :metric, :memory, :connectivity, :compute_full_tree, :linkage, :distance_threshold)`" +":is_pure_julia" = "`false`" +":human_name" = "agglomerative clustering" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nAgglomerativeClustering\n```\n\nA model type for constructing a agglomerative clustering, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAgglomerativeClustering = @load AgglomerativeClustering pkg=MLJScikitLearnInterface\n```\n\nDo `model = AgglomerativeClustering()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AgglomerativeClustering(n_clusters=...)`.\n\nRecursively merges the pair of clusters that minimally increases a given linkage distance. Note: there is no `predict` or `transform`. Instead, inspect the `fitted_params`.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "AgglomerativeClustering" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nAgglomerativeClustering\n```\n\nA model type for constructing a agglomerative clustering, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAgglomerativeClustering = @load AgglomerativeClustering pkg=MLJScikitLearnInterface\n```\n\nDo `model = AgglomerativeClustering()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AgglomerativeClustering(n_clusters=...)`.\n\nRecursively merges the pair of clusters that minimally increases a given linkage distance. Note: there is no `predict` or `transform`. Instead, inspect the `fitted_params`.\n""" -":name" = "AgglomerativeClustering" -":human_name" = "agglomerative clustering" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params"] -":hyperparameters" = "`(:n_clusters, :metric, :memory, :connectivity, :compute_full_tree, :linkage, :distance_threshold)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.ElasticNetRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Int64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.ElasticNetRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nElasticNetRegressor\n```\n\nA model type for constructing a elastic net regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nElasticNetRegressor = @load ElasticNetRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ElasticNetRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ElasticNetRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `l1_ratio = 0.5`\n\n- `fit_intercept = true`\n\n- `precompute = false`\n\n- `max_iter = 1000`\n\n- `copy_X = true`\n\n- `tol = 0.0001`\n\n- `warm_start = false`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "ElasticNetRegressor" +":hyperparameters" = "`(:alpha, :l1_ratio, :fit_intercept, :precompute, :max_iter, :copy_X, :tol, :warm_start, :positive, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "elastic net regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alpha, :l1_ratio, :fit_intercept, :precompute, :max_iter, :copy_X, :tol, :warm_start, :positive, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Int64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nElasticNetRegressor\n```\n\nA model type for constructing a elastic net regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nElasticNetRegressor = @load ElasticNetRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ElasticNetRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`ElasticNetRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `l1_ratio = 0.5`\n\n- `fit_intercept = true`\n\n- `precompute = false`\n\n- `max_iter = 1000`\n\n- `copy_X = true`\n\n- `tol = 0.0001`\n\n- `warm_start = false`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "ElasticNetRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.RandomForestClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.RandomForestClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nRandomForestClassifier\n```\n\nA model type for constructing a random forest classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestClassifier = @load RandomForestClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = RandomForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestClassifier(n_estimators=...)`.\n\nA random forest is a meta estimator that fits a number of classifying decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting. The sub-sample size is controlled with the `max_samples` parameter if `bootstrap=True` (default), otherwise the whole dataset is used to build each tree.\n""" -":name" = "RandomForestClassifier" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight, :ccp_alpha, :max_samples, :monotonic_cst)`" +":is_pure_julia" = "`false`" ":human_name" = "random forest classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :class_weight, :ccp_alpha, :max_samples, :monotonic_cst)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nRandomForestClassifier\n```\n\nA model type for constructing a random forest classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestClassifier = @load RandomForestClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = RandomForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestClassifier(n_estimators=...)`.\n\nA random forest is a meta estimator that fits a number of classifying decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting. The sub-sample size is controlled with the `max_samples` parameter if `bootstrap=True` (default), otherwise the whole dataset is used to build each tree.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "RandomForestClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LogisticCVClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Int64, AbstractVector{Float64}}\", \"Bool\", \"Any\", \"Bool\", \"String\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Any\", \"Union{Nothing, Int64}\", \"Int64\", \"Bool\", \"Float64\", \"String\", \"Any\", \"Union{Nothing, AbstractVector{Float64}}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.LogisticCVClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:Cs, :fit_intercept, :cv, :dual, :penalty, :scoring, :solver, :tol, :max_iter, :class_weight, :n_jobs, :verbose, :refit, :intercept_scaling, :multi_class, :random_state, :l1_ratios)`" +":is_pure_julia" = "`false`" +":human_name" = "logistic regression classifier with built-in cross-validation" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLogisticCVClassifier\n```\n\nA model type for constructing a logistic regression classifier with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLogisticCVClassifier = @load LogisticCVClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = LogisticCVClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LogisticCVClassifier(Cs=...)`.\n# Hyper-parameters\n\n- `Cs = 10`\n\n- `fit_intercept = true`\n\n- `cv = 5`\n\n- `dual = false`\n\n- `penalty = l2`\n\n- `scoring = nothing`\n\n- `solver = lbfgs`\n\n- `tol = 0.0001`\n\n- `max_iter = 100`\n\n- `class_weight = nothing`\n\n- `n_jobs = nothing`\n\n- `verbose = 0`\n\n- `refit = true`\n\n- `intercept_scaling = 1.0`\n\n- `multi_class = auto`\n\n- `random_state = nothing`\n\n- `l1_ratios = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "LogisticCVClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLogisticCVClassifier\n```\n\nA model type for constructing a logistic regression classifier with built-in cross-validation, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLogisticCVClassifier = @load LogisticCVClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = LogisticCVClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LogisticCVClassifier(Cs=...)`.\n# Hyper-parameters\n\n- `Cs = 10`\n\n- `fit_intercept = true`\n\n- `cv = 5`\n\n- `dual = false`\n\n- `penalty = l2`\n\n- `scoring = nothing`\n\n- `solver = lbfgs`\n\n- `tol = 0.0001`\n\n- `max_iter = 100`\n\n- `class_weight = nothing`\n\n- `n_jobs = nothing`\n\n- `verbose = 0`\n\n- `refit = true`\n\n- `intercept_scaling = 1.0`\n\n- `multi_class = auto`\n\n- `random_state = nothing`\n\n- `l1_ratios = nothing`\n\n""" -":name" = "LogisticCVClassifier" -":human_name" = "logistic regression classifier with built-in cross-validation" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:Cs, :fit_intercept, :cv, :dual, :penalty, :scoring, :solver, :tol, :max_iter, :class_weight, :n_jobs, :verbose, :refit, :intercept_scaling, :multi_class, :random_state, :l1_ratios)`" -":hyperparameter_types" = "`(\"Union{Int64, AbstractVector{Float64}}\", \"Bool\", \"Any\", \"Bool\", \"String\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Any\", \"Union{Nothing, Int64}\", \"Int64\", \"Bool\", \"Float64\", \"String\", \"Any\", \"Union{Nothing, AbstractVector{Float64}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.MultiTaskElasticNetRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.MultiTaskElasticNetRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nMultiTaskElasticNetRegressor\n```\n\nA model type for constructing a multi-target elastic net regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskElasticNetRegressor = @load MultiTaskElasticNetRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskElasticNetRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskElasticNetRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `l1_ratio = 0.5`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `warm_start = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "MultiTaskElasticNetRegressor" +":hyperparameters" = "`(:alpha, :l1_ratio, :fit_intercept, :copy_X, :max_iter, :tol, :warm_start, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "multi-target elastic net regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alpha, :l1_ratio, :fit_intercept, :copy_X, :max_iter, :tol, :warm_start, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Float64\", \"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultiTaskElasticNetRegressor\n```\n\nA model type for constructing a multi-target elastic net regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultiTaskElasticNetRegressor = @load MultiTaskElasticNetRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultiTaskElasticNetRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultiTaskElasticNetRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `l1_ratio = 0.5`\n\n- `fit_intercept = true`\n\n- `copy_X = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `warm_start = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "MultiTaskElasticNetRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.ExtraTreesRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.ExtraTreesRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nExtraTreesRegressor\n```\n\nA model type for constructing a extra trees regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nExtraTreesRegressor = @load ExtraTreesRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ExtraTreesRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ExtraTreesRegressor(n_estimators=...)`.\n\nExtra trees regressor, fits a number of randomized decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.\n""" -":name" = "ExtraTreesRegressor" +":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :ccp_alpha, :max_samples, :monotonic_cst)`" +":is_pure_julia" = "`false`" ":human_name" = "extra trees regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:n_estimators, :criterion, :max_depth, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_features, :max_leaf_nodes, :min_impurity_decrease, :bootstrap, :oob_score, :n_jobs, :random_state, :verbose, :warm_start, :ccp_alpha, :max_samples, :monotonic_cst)`" -":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nExtraTreesRegressor\n```\n\nA model type for constructing a extra trees regressor, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nExtraTreesRegressor = @load ExtraTreesRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = ExtraTreesRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ExtraTreesRegressor(n_estimators=...)`.\n\nExtra trees regressor, fits a number of randomized decision trees on various sub-samples of the dataset and uses averaging to improve the predictive accuracy and control over-fitting.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "ExtraTreesRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.LassoRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.LassoRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nLassoRegressor\n```\n\nA model type for constructing a lasso regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoRegressor = @load LassoRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `precompute = false`\n\n- `copy_X = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `warm_start = false`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" -":name" = "LassoRegressor" +":hyperparameters" = "`(:alpha, :fit_intercept, :precompute, :copy_X, :max_iter, :tol, :warm_start, :positive, :random_state, :selection)`" +":is_pure_julia" = "`false`" ":human_name" = "lasso regressor" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:alpha, :fit_intercept, :precompute, :copy_X, :max_iter, :tol, :warm_start, :positive, :random_state, :selection)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nLassoRegressor\n```\n\nA model type for constructing a lasso regressor, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nLassoRegressor = @load LassoRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = LassoRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`LassoRegressor(alpha=...)`.\n# Hyper-parameters\n\n- `alpha = 1.0`\n\n- `fit_intercept = true`\n\n- `precompute = false`\n\n- `copy_X = true`\n\n- `max_iter = 1000`\n\n- `tol = 0.0001`\n\n- `warm_start = false`\n\n- `positive = false`\n\n- `random_state = nothing`\n\n- `selection = cyclic`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "LassoRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.MultinomialNBClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "BSD" +":prediction_type" = ":probabilistic" ":load_path" = "MLJScikitLearnInterface.MultinomialNBClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:alpha, :fit_prior, :class_prior)`" +":is_pure_julia" = "`false`" +":human_name" = "multinomial naive Bayes classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultinomialNBClassifier(alpha=...)`.\n\nMultinomial naive bayes classifier. It is suitable for classification with discrete features (e.g. word counts for text classification).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "MultinomialNBClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial naive Bayes classifier, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultinomialNBClassifier(alpha=...)`.\n\nMultinomial naive bayes classifier. It is suitable for classification with discrete features (e.g. word counts for text classification).\n""" -":name" = "MultinomialNBClassifier" -":human_name" = "multinomial naive Bayes classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alpha, :fit_prior, :class_prior)`" -":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.GradientBoostingRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.GradientBoostingRegressor" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" -":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nGradientBoostingRegressor\n```\n\nA model type for constructing a gradient boosting ensemble regression, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGradientBoostingRegressor = @load GradientBoostingRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = GradientBoostingRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `GradientBoostingRegressor(loss=...)`.\n\nThis estimator builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage a regression tree is fit on the negative gradient of the given loss function.\n\n[`HistGradientBoostingRegressor`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" -":name" = "GradientBoostingRegressor" +":hyperparameters" = "`(:loss, :learning_rate, :n_estimators, :subsample, :criterion, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_depth, :min_impurity_decrease, :init, :random_state, :max_features, :alpha, :verbose, :max_leaf_nodes, :warm_start, :validation_fraction, :n_iter_no_change, :tol)`" +":is_pure_julia" = "`false`" ":human_name" = "gradient boosting ensemble regression" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":feature_importances"] -":hyperparameters" = "`(:loss, :learning_rate, :n_estimators, :subsample, :criterion, :min_samples_split, :min_samples_leaf, :min_weight_fraction_leaf, :max_depth, :min_impurity_decrease, :init, :random_state, :max_features, :alpha, :verbose, :max_leaf_nodes, :warm_start, :validation_fraction, :n_iter_no_change, :tol)`" -":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nGradientBoostingRegressor\n```\n\nA model type for constructing a gradient boosting ensemble regression, based on [MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGradientBoostingRegressor = @load GradientBoostingRegressor pkg=MLJScikitLearnInterface\n```\n\nDo `model = GradientBoostingRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `GradientBoostingRegressor(loss=...)`.\n\nThis estimator builds an additive model in a forward stage-wise fashion; it allows for the optimization of arbitrary differentiable loss functions. In each stage a regression tree is fit on the negative gradient of the given loss function.\n\n[`HistGradientBoostingRegressor`](@ref) is a much faster variant of this algorithm for intermediate datasets (`n_samples >= 10_000`).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" +":package_name" = "MLJScikitLearnInterface" +":name" = "GradientBoostingRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.SVMClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" +":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJScikitLearnInterface" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "BSD" +":prediction_type" = ":deterministic" ":load_path" = "MLJScikitLearnInterface.SVMClassifier" -":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" +":hyperparameters" = "`(:C, :kernel, :degree, :gamma, :coef0, :shrinking, :tol, :cache_size, :max_iter, :decision_function_shape, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "C-support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSVMClassifier\n```\n\nA model type for constructing a C-support vector classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMClassifier = @load SVMClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMClassifier(C=...)`.\n# Hyper-parameters\n\n- `C = 1.0`\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `shrinking = true`\n\n- `tol = 0.001`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n- `decision_function_shape = ovr`\n\n- `random_state = nothing`\n\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJScikitLearnInterface.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJScikitLearnInterface" +":name" = "SVMClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSVMClassifier\n```\n\nA model type for constructing a C-support vector classifier, based on\n[MLJScikitLearnInterface.jl](https://github.com/JuliaAI/MLJScikitLearnInterface.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSVMClassifier = @load SVMClassifier pkg=MLJScikitLearnInterface\n```\n\nDo `model = SVMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SVMClassifier(C=...)`.\n# Hyper-parameters\n\n- `C = 1.0`\n\n- `kernel = rbf`\n\n- `degree = 3`\n\n- `gamma = scale`\n\n- `coef0 = 0.0`\n\n- `shrinking = true`\n\n- `tol = 0.001`\n\n- `cache_size = 200`\n\n- `max_iter = -1`\n\n- `decision_function_shape = ovr`\n\n- `random_state = nothing`\n\n""" -":name" = "SVMClassifier" -":human_name" = "C-support vector classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:C, :kernel, :degree, :gamma, :coef0, :shrinking, :tol, :cache_size, :max_iter, :decision_function_shape, :random_state)`" -":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [OutlierDetectionNeighbors.ABODDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "OutlierDetectionNeighbors" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionNeighbors.ABODDetector" -":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel, :enhanced)`" +":is_pure_julia" = "`true`" +":human_name" = "abod detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nABODDetector(k = 5,\n metric = Euclidean(),\n algorithm = :kdtree,\n static = :auto,\n leafsize = 10,\n reorder = true,\n parallel = false,\n enhanced = false)\n```\n\nDetermine outliers based on the angles to its nearest neighbors. This implements the `FastABOD` variant described in the paper, that is, it uses the variance of angles to its nearest neighbors, not to the whole dataset, see [1]. \n\n*Notice:* The scores are inverted, to conform to our notion that higher scores describe higher outlierness.\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n```\nenhanced::Bool\n```\n\nWhen `enhanced=true`, it uses the enhanced ABOD (EABOD) adaptation proposed by [2].\n\n## Examples\n\n```julia\nusing OutlierDetection: ABODDetector, fit, transform\ndetector = ABODDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Kriegel, Hans-Peter; S hubert, Matthias; Zimek, Arthur (2008): Angle-based outlier detection in high-dimensional data.\n\n[2] Li, Xiaojie; Lv, Jian Cheng; Cheng, Dongdong (2015): Angle-Based Outlier Detection Algorithm with More Stable Relationships.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNeighbors.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionNeighbors" +":name" = "ABODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nABODDetector(k = 5,\n metric = Euclidean(),\n algorithm = :kdtree,\n static = :auto,\n leafsize = 10,\n reorder = true,\n parallel = false,\n enhanced = false)\n```\n\nDetermine outliers based on the angles to its nearest neighbors. This implements the `FastABOD` variant described in the paper, that is, it uses the variance of angles to its nearest neighbors, not to the whole dataset, see [1]. \n\n*Notice:* The scores are inverted, to conform to our notion that higher scores describe higher outlierness.\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n```\nenhanced::Bool\n```\n\nWhen `enhanced=true`, it uses the enhanced ABOD (EABOD) adaptation proposed by [2].\n\n## Examples\n\n```julia\nusing OutlierDetection: ABODDetector, fit, transform\ndetector = ABODDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Kriegel, Hans-Peter; S hubert, Matthias; Zimek, Arthur (2008): Angle-based outlier detection in high-dimensional data.\n\n[2] Li, Xiaojie; Lv, Jian Cheng; Cheng, Dongdong (2015): Angle-Based Outlier Detection Algorithm with More Stable Relationships.\n""" -":name" = "ABODDetector" -":human_name" = "abod detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel, :enhanced)`" -":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" [OutlierDetectionNeighbors.DNNDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Real\")`" +":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "OutlierDetectionNeighbors" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionNeighbors.DNNDetector" -":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameters" = "`(:metric, :algorithm, :static, :leafsize, :reorder, :parallel, :d)`" +":is_pure_julia" = "`true`" +":human_name" = "dnn detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nDNNDetector(d = 0,\n metric = Euclidean(),\n algorithm = :kdtree,\n leafsize = 10,\n reorder = true,\n parallel = false)\n```\n\nAnomaly score based on the number of neighbors in a hypersphere of radius `d`. Knorr et al. [1] directly converted the resulting outlier scores to labels, thus this implementation does not fully reflect the approach from the paper.\n\n## Parameters\n\n```\nd::Real\n```\n\nThe hypersphere radius used to calculate the global density of an instance.\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n## Examples\n\n```julia\nusing OutlierDetection: DNNDetector, fit, transform\ndetector = DNNDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Knorr, Edwin M.; Ng, Raymond T. (1998): Algorithms for Mining Distance-Based Outliers in Large Datasets.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNeighbors.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionNeighbors" +":name" = "DNNDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nDNNDetector(d = 0,\n metric = Euclidean(),\n algorithm = :kdtree,\n leafsize = 10,\n reorder = true,\n parallel = false)\n```\n\nAnomaly score based on the number of neighbors in a hypersphere of radius `d`. Knorr et al. [1] directly converted the resulting outlier scores to labels, thus this implementation does not fully reflect the approach from the paper.\n\n## Parameters\n\n```\nd::Real\n```\n\nThe hypersphere radius used to calculate the global density of an instance.\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n## Examples\n\n```julia\nusing OutlierDetection: DNNDetector, fit, transform\ndetector = DNNDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Knorr, Edwin M.; Ng, Raymond T. (1998): Algorithms for Mining Distance-Based Outliers in Large Datasets.\n""" -":name" = "DNNDetector" -":human_name" = "dnn detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:metric, :algorithm, :static, :leafsize, :reorder, :parallel, :d)`" -":hyperparameter_types" = "`(\"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Real\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" [OutlierDetectionNeighbors.LOFDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" +":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "OutlierDetectionNeighbors" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionNeighbors.LOFDetector" -":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel)`" +":is_pure_julia" = "`true`" +":human_name" = "lof detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLOFDetector(k = 5,\n metric = Euclidean(),\n algorithm = :kdtree,\n leafsize = 10,\n reorder = true,\n parallel = false)\n```\n\nCalculate an anomaly score based on the density of an instance in comparison to its neighbors. This algorithm introduced the notion of local outliers and was developed by Breunig et al., see [1].\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n## Examples\n\n```julia\nusing OutlierDetection: LOFDetector, fit, transform\ndetector = LOFDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Breunig, Markus M.; Kriegel, Hans-Peter; Ng, Raymond T.; Sander, Jörg (2000): LOF: Identifying Density-Based Local Outliers.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNeighbors.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionNeighbors" +":name" = "LOFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLOFDetector(k = 5,\n metric = Euclidean(),\n algorithm = :kdtree,\n leafsize = 10,\n reorder = true,\n parallel = false)\n```\n\nCalculate an anomaly score based on the density of an instance in comparison to its neighbors. This algorithm introduced the notion of local outliers and was developed by Breunig et al., see [1].\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n## Examples\n\n```julia\nusing OutlierDetection: LOFDetector, fit, transform\ndetector = LOFDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Breunig, Markus M.; Kriegel, Hans-Peter; Ng, Raymond T.; Sander, Jörg (2000): LOF: Identifying Density-Based Local Outliers.\n""" -":name" = "LOFDetector" -":human_name" = "lof detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel)`" -":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" [OutlierDetectionNeighbors.KNNDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Symbol\")`" +":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "OutlierDetectionNeighbors" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionNeighbors.KNNDetector" -":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel, :reduction)`" +":is_pure_julia" = "`true`" +":human_name" = "knn detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKNNDetector(k=5,\n metric=Euclidean,\n algorithm=:kdtree,\n leafsize=10,\n reorder=true,\n reduction=:maximum)\n```\n\nCalculate the anomaly score of an instance based on the distance to its k-nearest neighbors.\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n```\nreduction::Symbol\n```\n\nOne of `(:maximum, :median, :mean)`. (`reduction=:maximum`) was proposed by [1]. Angiulli et al. [2] proposed sum to reduce the distances, but mean has been implemented for numerical stability.\n\n## Examples\n\n```julia\nusing OutlierDetection: KNNDetector, fit, transform\ndetector = KNNDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Ramaswamy, Sridhar; Rastogi, Rajeev; Shim, Kyuseok (2000): Efficient Algorithms for Mining Outliers from Large Data Sets.\n\n[2] Angiulli, Fabrizio; Pizzuti, Clara (2002): Fast Outlier Detection in High Dimensional Spaces.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNeighbors.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionNeighbors" +":name" = "KNNDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKNNDetector(k=5,\n metric=Euclidean,\n algorithm=:kdtree,\n leafsize=10,\n reorder=true,\n reduction=:maximum)\n```\n\nCalculate the anomaly score of an instance based on the distance to its k-nearest neighbors.\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n```\nreduction::Symbol\n```\n\nOne of `(:maximum, :median, :mean)`. (`reduction=:maximum`) was proposed by [1]. Angiulli et al. [2] proposed sum to reduce the distances, but mean has been implemented for numerical stability.\n\n## Examples\n\n```julia\nusing OutlierDetection: KNNDetector, fit, transform\ndetector = KNNDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Ramaswamy, Sridhar; Rastogi, Rajeev; Shim, Kyuseok (2000): Efficient Algorithms for Mining Outliers from Large Data Sets.\n\n[2] Angiulli, Fabrizio; Pizzuti, Clara (2002): Fast Outlier Detection in High Dimensional Spaces.\n""" -":name" = "KNNDetector" -":human_name" = "knn detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel, :reduction)`" -":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" [OutlierDetectionNeighbors.COFDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" +":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "OutlierDetectionNeighbors" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionNeighbors.COFDetector" -":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" +":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel)`" +":is_pure_julia" = "`true`" +":human_name" = "cof detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nCOFDetector(k = 5,\n metric = Euclidean(),\n algorithm = :kdtree,\n leafsize = 10,\n reorder = true,\n parallel = false)\n```\n\nLocal outlier density based on chaining distance between graphs of neighbors, as described in [1].\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n## Examples\n\n```julia\nusing OutlierDetection: COFDetector, fit, transform\ndetector = COFDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Tang, Jian; Chen, Zhixiang; Fu, Ada Wai-Chee; Cheung, David Wai-Lok (2002): Enhancing Effectiveness of Outlier Detections for Low Density Patterns.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionNeighbors.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionNeighbors" +":name" = "COFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCOFDetector(k = 5,\n metric = Euclidean(),\n algorithm = :kdtree,\n leafsize = 10,\n reorder = true,\n parallel = false)\n```\n\nLocal outlier density based on chaining distance between graphs of neighbors, as described in [1].\n\n## Parameters\n\n```\nk::Integer\n```\n\nNumber of neighbors (must be greater than 0).\n\n```\nmetric::Metric\n```\n\nThis is one of the Metric types defined in the Distances.jl package. It is possible to define your own metrics by creating new types that are subtypes of Metric.\n\n```\nalgorithm::Symbol\n```\n\nOne of `(:kdtree, :balltree)`. In a `kdtree`, points are recursively split into groups using hyper-planes. Therefore a KDTree only works with axis aligned metrics which are: Euclidean, Chebyshev, Minkowski and Cityblock. A *brutetree* linearly searches all points in a brute force fashion and works with any Metric. A *balltree* recursively splits points into groups bounded by hyper-spheres and works with any Metric.\n\n```\nstatic::Union{Bool, Symbol}\n```\n\nOne of `(true, false, :auto)`. Whether the input data for fitting and transform should be statically or dynamically allocated. If `true`, the data is statically allocated. If `false`, the data is dynamically allocated. If `:auto`, the data is dynamically allocated if the product of all dimensions except the last is greater than 100.\n\n```\nleafsize::Int\n```\n\nDetermines at what number of points to stop splitting the tree further. There is a trade-off between traversing the tree and having to evaluate the metric function for increasing number of points.\n\n```\nreorder::Bool\n```\n\nWhile building the tree this will put points close in distance close in memory since this helps with cache locality. In this case, a copy of the original data will be made so that the original data is left unmodified. This can have a significant impact on performance and is by default set to true.\n\n```\nparallel::Bool\n```\n\nParallelize `score` and `predict` using all threads available. The number of threads can be set with the `JULIA_NUM_THREADS` environment variable. Note: `fit` is not parallel.\n\n## Examples\n\n```julia\nusing OutlierDetection: COFDetector, fit, transform\ndetector = COFDetector()\nX = rand(10, 100)\nmodel, result = fit(detector, X; verbosity=0)\ntest_scores = transform(detector, model, X)\n```\n\n## References\n\n[1] Tang, Jian; Chen, Zhixiang; Fu, Ada Wai-Chee; Cheung, David Wai-Lok (2002): Enhancing Effectiveness of Outlier Detections for Low Density Patterns.\n""" -":name" = "COFDetector" -":human_name" = "cof detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:k, :metric, :algorithm, :static, :leafsize, :reorder, :parallel)`" -":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" [SIRUS.StableRulesClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" +":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SIRUS" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "SIRUS.StableForestClassifier" -":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" -":package_url" = "https://github.com/rikhuijzer/SIRUS.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nStableRulesClassifier\n```\n\nA model type for constructing a stable rules classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesClassifier = @load StableRulesClassifier pkg=SIRUS\n```\n\nDo `model = StableRulesClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesClassifier(rng=...)`.\n\n`StableRulesClassifier` implements the explainable rule-based model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n * `max_rules::Int=10`: This is the most important hyperparameter after `lambda`. The more rules, the more accurate the model should be. If this is not the case, tune `lambda` first. However, more rules will also decrease model interpretability. So, it is important to find a good balance here. In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n * `lambda::Float64=1.0`: The weights of the final rules are determined via a regularized regression over each rule as a binary feature. This hyperparameter specifies the strength of the ridge (L2) regularizer. SIRUS is very sensitive to the choice of this hyperparameter. Ensure that you try the full range from 10^-4 to 10^4 (e.g., 0.001, 0.01, ..., 100). When trying the range, one good check is to verify that an increase in `max_rules` increases performance. If this is not the case, then try a different value for `lambda`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableRules` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" -":name" = "StableRulesClassifier" +":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`" +":is_pure_julia" = "`true`" ":human_name" = "stable rules classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":predict"] -":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`" -":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nStableRulesClassifier\n```\n\nA model type for constructing a stable rules classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesClassifier = @load StableRulesClassifier pkg=SIRUS\n```\n\nDo `model = StableRulesClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesClassifier(rng=...)`.\n\n`StableRulesClassifier` implements the explainable rule-based model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n * `max_rules::Int=10`: This is the most important hyperparameter after `lambda`. The more rules, the more accurate the model should be. If this is not the case, tune `lambda` first. However, more rules will also decrease model interpretability. So, it is important to find a good balance here. In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n * `lambda::Float64=1.0`: The weights of the final rules are determined via a regularized regression over each rule as a binary feature. This hyperparameter specifies the strength of the ridge (L2) regularizer. SIRUS is very sensitive to the choice of this hyperparameter. Ensure that you try the full range from 10^-4 to 10^4 (e.g., 0.001, 0.01, ..., 100). When trying the range, one good check is to verify that an increase in `max_rules` increases performance. If this is not the case, then try a different value for `lambda`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableRules` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/rikhuijzer/SIRUS.jl" +":package_name" = "SIRUS" +":name" = "StableRulesClassifier" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" [SIRUS.StableForestClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" +":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SIRUS" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "SIRUS.StableForestClassifier" -":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" +":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`" +":is_pure_julia" = "`true`" +":human_name" = "stable forest classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nStableForestClassifier\n```\n\nA model type for constructing a stable forest classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestClassifier = @load StableForestClassifier pkg=SIRUS\n```\n\nDo `model = StableForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestClassifier(rng=...)`.\n\n`StableForestClassifier` implements the random forest classifier with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)). This stabilization increases stability when extracting rules. The impact on the predictive accuracy compared to standard random forests should be relatively small.\n\n!!! note\n Just like normal random forests, this model is not easily explainable. If you are interested in an explainable model, use the `StableRulesClassifier` or `StableRulesRegressor`.\n\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableForest` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/rikhuijzer/SIRUS.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "SIRUS" +":name" = "StableForestClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nStableForestClassifier\n```\n\nA model type for constructing a stable forest classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestClassifier = @load StableForestClassifier pkg=SIRUS\n```\n\nDo `model = StableForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestClassifier(rng=...)`.\n\n`StableForestClassifier` implements the random forest classifier with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)). This stabilization increases stability when extracting rules. The impact on the predictive accuracy compared to standard random forests should be relatively small.\n\n!!! note\n Just like normal random forests, this model is not easily explainable. If you are interested in an explainable model, use the `StableRulesClassifier` or `StableRulesRegressor`.\n\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableForest` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" -":name" = "StableForestClassifier" -":human_name" = "stable forest classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":predict"] -":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`" -":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" [SIRUS.StableRulesRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" +":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SIRUS" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "SIRUS.StableForestRegressor" -":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" +":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`" +":is_pure_julia" = "`true`" +":human_name" = "stable rules regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nStableRulesRegressor\n```\n\nA model type for constructing a stable rules regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesRegressor = @load StableRulesRegressor pkg=SIRUS\n```\n\nDo `model = StableRulesRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesRegressor(rng=...)`.\n\n`StableRulesRegressor` implements the explainable rule-based regression model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n * `max_rules::Int=10`: This is the most important hyperparameter after `lambda`. The more rules, the more accurate the model should be. If this is not the case, tune `lambda` first. However, more rules will also decrease model interpretability. So, it is important to find a good balance here. In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n * `lambda::Float64=1.0`: The weights of the final rules are determined via a regularized regression over each rule as a binary feature. This hyperparameter specifies the strength of the ridge (L2) regularizer. SIRUS is very sensitive to the choice of this hyperparameter. Ensure that you try the full range from 10^-4 to 10^4 (e.g., 0.001, 0.01, ..., 100). When trying the range, one good check is to verify that an increase in `max_rules` increases performance. If this is not the case, then try a different value for `lambda`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableRules` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/rikhuijzer/SIRUS.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "SIRUS" +":name" = "StableRulesRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nStableRulesRegressor\n```\n\nA model type for constructing a stable rules regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesRegressor = @load StableRulesRegressor pkg=SIRUS\n```\n\nDo `model = StableRulesRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesRegressor(rng=...)`.\n\n`StableRulesRegressor` implements the explainable rule-based regression model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n * `max_rules::Int=10`: This is the most important hyperparameter after `lambda`. The more rules, the more accurate the model should be. If this is not the case, tune `lambda` first. However, more rules will also decrease model interpretability. So, it is important to find a good balance here. In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n * `lambda::Float64=1.0`: The weights of the final rules are determined via a regularized regression over each rule as a binary feature. This hyperparameter specifies the strength of the ridge (L2) regularizer. SIRUS is very sensitive to the choice of this hyperparameter. Ensure that you try the full range from 10^-4 to 10^4 (e.g., 0.001, 0.01, ..., 100). When trying the range, one good check is to verify that an increase in `max_rules` increases performance. If this is not the case, then try a different value for `lambda`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableRules` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" -":name" = "StableRulesRegressor" -":human_name" = "stable rules regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":predict"] -":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`" -":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" [SIRUS.StableForestRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" +":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SIRUS" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "SIRUS.StableForestRegressor" -":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" +":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`" +":is_pure_julia" = "`true`" +":human_name" = "stable forest regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nStableForestRegressor\n```\n\nA model type for constructing a stable forest regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestRegressor = @load StableForestRegressor pkg=SIRUS\n```\n\nDo `model = StableForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestRegressor(rng=...)`.\n\n`StableForestRegressor` implements the random forest regressor with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableForest` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/rikhuijzer/SIRUS.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "SIRUS" +":name" = "StableForestRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nStableForestRegressor\n```\n\nA model type for constructing a stable forest regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestRegressor = @load StableForestRegressor pkg=SIRUS\n```\n\nDo `model = StableForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestRegressor(rng=...)`.\n\n`StableForestRegressor` implements the random forest regressor with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rng::AbstractRNG=default_rng()`: Random number generator. Using a `StableRNG` from `StableRNGs.jl` is advised.\n * `partial_sampling::Float64=0.7`: Ratio of samples to use in each subset of the data. The default should be fine for most cases.\n * `n_trees::Int=1000`: The number of trees to use. It is advisable to use at least thousand trees to for a better rule selection, and in turn better predictive performance.\n * `max_depth::Int=2`: The depth of the tree. A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n * `q::Int=10`: Number of cutpoints to use per feature. The default value should be fine for most situations.\n * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `StableForest` object.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n""" -":name" = "StableForestRegressor" -":human_name" = "stable forest regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":predict"] -":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`" -":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" [MLJIteration.IteratedModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`IteratedModel`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, Dict{Any, <:Real}}\", \"Any\", \"Bool\", \"Bool\", \"Union{Nothing, Expr, Symbol}\", \"Bool\")`" +":package_uuid" = "614be32b-d00c-4edb-bd02-1eb411ab5e55" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJIteration" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "MLJIteration.IteratedModel" -":package_uuid" = "614be32b-d00c-4edb-bd02-1eb411ab5e55" -":package_url" = "https://github.com/JuliaAI/MLJIteration.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nIteratedModel(model;\n controls=MLJIteration.DEFAULT_CONTROLS,\n resampling=Holdout(),\n measure=nothing,\n retrain=false,\n advanced_options...,\n)\n```\n\nWrap the specified supervised `model` in the specified iteration `controls`. Here `model` should support iteration, which is true if (`iteration_parameter(model)` is different from `nothing`.\n\nAvailable controls: Step(), Info(), Warn(), Error(), Callback(), WithLossDo(), WithTrainingLossesDo(), WithNumberDo(), Data(), Disjunction(), GL(), InvalidValue(), Never(), NotANumber(), NumberLimit(), NumberSinceBest(), PQ(), Patience(), Threshold(), TimeLimit(), Warmup(), WithIterationsDo(), WithEvaluationDo(), WithFittedParamsDo(), WithReportDo(), WithMachineDo(), WithModelDo(), CycleLearningRate() and Save().\n\n!!! important\n To make out-of-sample losses available to the controls, the wrapped `model` is only trained on part of the data, as iteration proceeds. The user may want to force retraining on all data after controlled iteration has finished by specifying `retrain=true`. See also \"Training\", and the `retrain` option, under \"Extended help\" below.\n\n\n# Extended help\n\n# Options\n\n * `controls=Any[IterationControl.Step(1), EarlyStopping.Patience(5), EarlyStopping.GL(2.0), EarlyStopping.TimeLimit(Dates.Millisecond(108000)), EarlyStopping.InvalidValue()]`: Controls are summarized at [https://JuliaAI.github.io/MLJ.jl/dev/getting_started/](https://JuliaAI.github.io/MLJ.jl/dev/controlling_iterative_models/) but query individual doc-strings for details and advanced options. For creating your own controls, refer to the documentation just cited.\n * `resampling=Holdout(fraction_train=0.7)`: The default resampling holds back 30% of data for computing an out-of-sample estimate of performance (the \"loss\") for loss-based controls such as `WithLossDo`. Specify `resampling=nothing` if all data is to be used for controlled iteration, with each out-of-sample loss replaced by the most recent training loss, assuming this is made available by the model (`supports_training_losses(model) == true`). If the model does not report a training loss, you can use `resampling=InSample()` instead. Otherwise, `resampling` must have type `Holdout` or be a vector with one element of the form `(train_indices, test_indices)`.\n * `measure=nothing`: StatisticalMeasures.jl compatible measure for estimating model performance (the \"loss\", but the orientation is immaterial - i.e., this could be a score). Inferred by default. Ignored if `resampling=nothing`.\n * `retrain=false`: If `retrain=true` or `resampling=nothing`, `iterated_model` behaves exactly like the original `model` but with the iteration parameter automatically selected (\"learned\"). That is, the model is retrained on *all* available data, using the same number of iterations, once controlled iteration has stopped. This is typically desired if wrapping the iterated model further, or when inserting in a pipeline or other composite model. If `retrain=false` (default) and `resampling isa Holdout`, then `iterated_model` behaves like the original model trained on a subset of the provided data.\n * `weights=nothing`: per-observation weights to be passed to `measure` where supported; if unspecified, these are understood to be uniform.\n * `class_weights=nothing`: class-weights to be passed to `measure` where supported; if unspecified, these are understood to be uniform.\n * `operation=nothing`: Operation, such as `predict` or `predict_mode`, for computing target values, or proxy target values, for consumption by `measure`; automatically inferred by default.\n * `check_measure=true`: Specify `false` to override checks on `measure` for compatibility with the training data.\n * `iteration_parameter=nothing`: A symbol, such as `:epochs`, naming the iteration parameter of `model`; inferred by default. Note that the actual value of the iteration parameter in the supplied `model` is ignored; only the value of an internal clone is mutated during training the wrapped model.\n * `cache=true`: Whether or not model-specific representations of data are cached in between iteration parameter increments; specify `cache=false` to prioritize memory over speed.\n\n# Training\n\nTraining an instance `iterated_model` of `IteratedModel` on some `data` (by binding to a machine and calling `fit!`, for example) performs the following actions:\n\n * Assuming `resampling !== nothing`, the `data` is split into *train* and *test* sets, according to the specified `resampling` strategy.\n * A clone of the wrapped model, `model` is bound to the train data in an internal machine, `train_mach`. If `resampling === nothing`, all data is used instead. This machine is the object to which controls are applied. For example, `Callback(fitted_params |> print)` will print the value of `fitted_params(train_mach)`.\n * The iteration parameter of the clone is set to `0`.\n * The specified `controls` are repeatedly applied to `train_mach` in sequence, until one of the controls triggers a stop. Loss-based controls (eg, `Patience()`, `GL()`, `Threshold(0.001)`) use an out-of-sample loss, obtained by applying `measure` to predictions and the test target values. (Specifically, these predictions are those returned by `operation(train_mach)`.) If `resampling === nothing` then the most recent training loss is used instead. Some controls require *both* out-of-sample and training losses (eg, `PQ()`).\n * Once a stop has been triggered, a clone of `model` is bound to all `data` in a machine called `mach_production` below, unless `retrain == false` (true by default) or `resampling === nothing`, in which case `mach_production` coincides with `train_mach`.\n\n# Prediction\n\nCalling `predict(mach, Xnew)` in the example above returns `predict(mach_production, Xnew)`. Similar similar statements hold for `predict_mean`, `predict_mode`, `predict_median`.\n\n# Controls that mutate parameters\n\nA control is permitted to mutate the fields (hyper-parameters) of `train_mach.model` (the clone of `model`). For example, to mutate a learning rate one might use the control\n\n```\nCallback(mach -> mach.model.eta = 1.05*mach.model.eta)\n```\n\nHowever, unless `model` supports warm restarts with respect to changes in that parameter, this will trigger retraining of `train_mach` from scratch, with a different training outcome, which is not recommended.\n\n# Warm restarts\n\nIn the following example, the second `fit!` call will not restart training of the internal `train_mach`, assuming `model` supports warm restarts:\n\n```julia\niterated_model = IteratedModel(\n model,\n controls = [Step(1), NumberLimit(100)],\n)\nmach = machine(iterated_model, X, y)\nfit!(mach) # train for 100 iterations\niterated_model.controls = [Step(1), NumberLimit(50)],\nfit!(mach) # train for an *extra* 50 iterations\n```\n\nMore generally, if `iterated_model` is mutated and `fit!(mach)` is called again, then a warm restart is attempted if the only parameters to change are `model` or `controls` or both.\n\nSpecifically, `train_mach.model` is mutated to match the current value of `iterated_model.model` and the iteration parameter of the latter is updated to the last value used in the preceding `fit!(mach)` call. Then repeated application of the (updated) controls begin anew.\n""" -":name" = "IteratedModel" +":hyperparameters" = "`(:model, :controls, :resampling, :measure, :weights, :class_weights, :operation, :retrain, :check_measure, :iteration_parameter, :cache)`" +":is_pure_julia" = "`false`" ":human_name" = "probabilistic iterated model" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :controls, :resampling, :measure, :weights, :class_weights, :operation, :retrain, :check_measure, :iteration_parameter, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, Dict{Any, <:Real}}\", \"Any\", \"Bool\", \"Bool\", \"Union{Nothing, Expr, Symbol}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`IteratedModel`" - -[PartitionedLS.PartLS] -":input_scitype" = "`Union{ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```\nIteratedModel(model;\n controls=MLJIteration.DEFAULT_CONTROLS,\n resampling=Holdout(),\n measure=nothing,\n retrain=false,\n advanced_options...,\n)\n```\n\nWrap the specified supervised `model` in the specified iteration `controls`. Here `model` should support iteration, which is true if (`iteration_parameter(model)` is different from `nothing`.\n\nAvailable controls: Step(), Info(), Warn(), Error(), Callback(), WithLossDo(), WithTrainingLossesDo(), WithNumberDo(), Data(), Disjunction(), GL(), InvalidValue(), Never(), NotANumber(), NumberLimit(), NumberSinceBest(), PQ(), Patience(), Threshold(), TimeLimit(), Warmup(), WithIterationsDo(), WithEvaluationDo(), WithFittedParamsDo(), WithReportDo(), WithMachineDo(), WithModelDo(), CycleLearningRate() and Save().\n\n!!! important\n To make out-of-sample losses available to the controls, the wrapped `model` is only trained on part of the data, as iteration proceeds. The user may want to force retraining on all data after controlled iteration has finished by specifying `retrain=true`. See also \"Training\", and the `retrain` option, under \"Extended help\" below.\n\n\n# Extended help\n\n# Options\n\n * `controls=Any[IterationControl.Step(1), EarlyStopping.Patience(5), EarlyStopping.GL(2.0), EarlyStopping.TimeLimit(Dates.Millisecond(108000)), EarlyStopping.InvalidValue()]`: Controls are summarized at [https://JuliaAI.github.io/MLJ.jl/dev/getting_started/](https://JuliaAI.github.io/MLJ.jl/dev/controlling_iterative_models/) but query individual doc-strings for details and advanced options. For creating your own controls, refer to the documentation just cited.\n * `resampling=Holdout(fraction_train=0.7)`: The default resampling holds back 30% of data for computing an out-of-sample estimate of performance (the \"loss\") for loss-based controls such as `WithLossDo`. Specify `resampling=nothing` if all data is to be used for controlled iteration, with each out-of-sample loss replaced by the most recent training loss, assuming this is made available by the model (`supports_training_losses(model) == true`). If the model does not report a training loss, you can use `resampling=InSample()` instead. Otherwise, `resampling` must have type `Holdout` or be a vector with one element of the form `(train_indices, test_indices)`.\n * `measure=nothing`: StatisticalMeasures.jl compatible measure for estimating model performance (the \"loss\", but the orientation is immaterial - i.e., this could be a score). Inferred by default. Ignored if `resampling=nothing`.\n * `retrain=false`: If `retrain=true` or `resampling=nothing`, `iterated_model` behaves exactly like the original `model` but with the iteration parameter automatically selected (\"learned\"). That is, the model is retrained on *all* available data, using the same number of iterations, once controlled iteration has stopped. This is typically desired if wrapping the iterated model further, or when inserting in a pipeline or other composite model. If `retrain=false` (default) and `resampling isa Holdout`, then `iterated_model` behaves like the original model trained on a subset of the provided data.\n * `weights=nothing`: per-observation weights to be passed to `measure` where supported; if unspecified, these are understood to be uniform.\n * `class_weights=nothing`: class-weights to be passed to `measure` where supported; if unspecified, these are understood to be uniform.\n * `operation=nothing`: Operation, such as `predict` or `predict_mode`, for computing target values, or proxy target values, for consumption by `measure`; automatically inferred by default.\n * `check_measure=true`: Specify `false` to override checks on `measure` for compatibility with the training data.\n * `iteration_parameter=nothing`: A symbol, such as `:epochs`, naming the iteration parameter of `model`; inferred by default. Note that the actual value of the iteration parameter in the supplied `model` is ignored; only the value of an internal clone is mutated during training the wrapped model.\n * `cache=true`: Whether or not model-specific representations of data are cached in between iteration parameter increments; specify `cache=false` to prioritize memory over speed.\n\n# Training\n\nTraining an instance `iterated_model` of `IteratedModel` on some `data` (by binding to a machine and calling `fit!`, for example) performs the following actions:\n\n * Assuming `resampling !== nothing`, the `data` is split into *train* and *test* sets, according to the specified `resampling` strategy.\n * A clone of the wrapped model, `model` is bound to the train data in an internal machine, `train_mach`. If `resampling === nothing`, all data is used instead. This machine is the object to which controls are applied. For example, `Callback(fitted_params |> print)` will print the value of `fitted_params(train_mach)`.\n * The iteration parameter of the clone is set to `0`.\n * The specified `controls` are repeatedly applied to `train_mach` in sequence, until one of the controls triggers a stop. Loss-based controls (eg, `Patience()`, `GL()`, `Threshold(0.001)`) use an out-of-sample loss, obtained by applying `measure` to predictions and the test target values. (Specifically, these predictions are those returned by `operation(train_mach)`.) If `resampling === nothing` then the most recent training loss is used instead. Some controls require *both* out-of-sample and training losses (eg, `PQ()`).\n * Once a stop has been triggered, a clone of `model` is bound to all `data` in a machine called `mach_production` below, unless `retrain == false` (true by default) or `resampling === nothing`, in which case `mach_production` coincides with `train_mach`.\n\n# Prediction\n\nCalling `predict(mach, Xnew)` in the example above returns `predict(mach_production, Xnew)`. Similar similar statements hold for `predict_mean`, `predict_mode`, `predict_median`.\n\n# Controls that mutate parameters\n\nA control is permitted to mutate the fields (hyper-parameters) of `train_mach.model` (the clone of `model`). For example, to mutate a learning rate one might use the control\n\n```\nCallback(mach -> mach.model.eta = 1.05*mach.model.eta)\n```\n\nHowever, unless `model` supports warm restarts with respect to changes in that parameter, this will trigger retraining of `train_mach` from scratch, with a different training outcome, which is not recommended.\n\n# Warm restarts\n\nIn the following example, the second `fit!` call will not restart training of the internal `train_mach`, assuming `model` supports warm restarts:\n\n```julia\niterated_model = IteratedModel(\n model,\n controls = [Step(1), NumberLimit(100)],\n)\nmach = machine(iterated_model, X, y)\nfit!(mach) # train for 100 iterations\niterated_model.controls = [Step(1), NumberLimit(50)],\nfit!(mach) # train for an *extra* 50 iterations\n```\n\nMore generally, if `iterated_model` is mutated and `fit!(mach)` is called again, then a warm restart is attempted if the only parameters to change are `model` or `controls` or both.\n\nSpecifically, `train_mach.model` is mutated to match the current value of `iterated_model.model` and the iteration parameter of the latter is updated to the last value used in the preceding `fit!(mach)` call. Then repeated application of the (updated) controls begin anew.\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJIteration.jl" +":package_name" = "MLJIteration" +":name" = "IteratedModel" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "PartitionedLS" -":package_license" = "MIT" -":load_path" = "PartitionedLS.PartLS" -":package_uuid" = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" -":package_url" = "https://github.com/ml-unito/PartitionedLS.jl.git" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nPartLS\n```\n\nA model type for fitting a partitioned least squares model to data. Both an MLJ and native interface are provided.\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using\n\n```\nPartLS = @load PartLS pkg=PartitionedLS\n```\n\nConstruct an instance with default hyper-parameters using the syntax `model = PartLS()`. Provide keyword arguments to override hyper-parameter defaults, as in `model = PartLS(P=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any matrix or table with `Continuous` element scitype. Check column scitypes of a table `X` with `schema(X)`.\n * `y`: any vector with `Continuous` element scitype. Check scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach)`.\n\n## Hyper-parameters\n\n * `Optimizer`: the optimization algorithm to use. It can be `Opt`, `Alt` or `BnB` (names exported by `PartitionedLS.jl`).\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n * `η`: the regularization parameter. It controls the strength of the regularization.\n * `ϵ`: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `T`: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `rng`: the random number generator to use.\n\n * If `nothing`, the global random number generator `rand` is used.\n * If an integer, the global number generator `rand` is used after seeding it with the given integer.\n * If an object of type `AbstractRNG`, the given random number generator is used.\n\n## Operations\n\n * `predict(mach, Xnew)`: return the predictions of the model on new data `Xnew`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `α`: the values of the α variables. For each partition `k`, it holds the values of the α variables are such that $\\sum_{i \\in P_k} \\alpha_{k} = 1$.\n * `β`: the values of the β variables. For each partition `k`, `β_k` is the coefficient that multiplies the features in the k-th partition.\n * `t`: the intercept term of the model.\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n\n## Examples\n\n```julia\nPartLS = @load PartLS pkg=PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\nmodel = PartLS(P=P)\nmach = machine(model, X, y) |> fit!\n\n# predictions on the training set:\npredict(mach, X)\n\n```\n\n# Native Interface\n\n```\nusing PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\n# fit using the optimal algorithm\nresult = fit(Opt, X, y, P, η = 0.0)\ny_hat = predict(result.model, X)\n```\n\nFor other `fit` keyword options, refer to the \"Hyper-parameters\" section for the MLJ interface.\n""" -":name" = "PartLS" -":human_name" = "part ls" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:Optimizer, :P, :η, :ϵ, :T, :rng)`" -":hyperparameter_types" = "`(\"Union{Type{PartitionedLS.Alt}, Type{PartitionedLS.BnB}, Type{PartitionedLS.Opt}}\", \"Matrix{Int64}\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"Union{Nothing, Int64, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJLinearModels.QuantileRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" -":package_license" = "MIT" -":load_path" = "MLJLinearModels.QuantileRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" -":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" +":is_wrapper" = "`true`" + +[MLJTSVDInterface.TSVDTransformer] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\")`" +":package_uuid" = "9449cd9e-2762-5aa3-a617-5413e99d722e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTSVDInterface.TSVDTransformer" +":hyperparameters" = "`(:nvals, :maxiter, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "truncated SVD transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = "Truncated SVD dimensionality reduction" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaLinearAlgebra/TSVD.jl" +":package_name" = "TSVD" +":name" = "TSVDTransformer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nQuantileRegressor\n```\n\nA model type for constructing a quantile regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nQuantileRegressor = @load QuantileRegressor pkg=MLJLinearModels\n```\n\nDo `model = QuantileRegressor()` to construct an instance with default hyper-parameters.\n\nThis model coincides with [`RobustRegressor`](@ref), with the exception that the robust loss, `rho`, is fixed to `QuantileRho(delta)`, where `delta` is a new hyperparameter.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `delta::Real`: parameterizes the `QuantileRho` function (indicating the quantile to use with default `0.5` for the median regression) Default: 0.5\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(QuantileRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`RobustRegressor`](@ref), [`HuberRegressor`](@ref).\n""" -":name" = "QuantileRegressor" -":human_name" = "quantile regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":fitted_params", ":predict", ":QuantileRegressor"] -":hyperparameters" = "`(:delta, :lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[MLJLinearModels.LogisticClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[PartitionedLS.PartLS] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Type{PartitionedLS.Alt}, Type{PartitionedLS.BnB}, Type{PartitionedLS.Opt}}\", \"Matrix{Int64}\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"Union{Nothing, Int64, Random.AbstractRNG}\")`" +":package_uuid" = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "PartitionedLS.PartLS" +":hyperparameters" = "`(:Optimizer, :P, :η, :ϵ, :T, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "part ls" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nPartLS\n```\n\nA model type for fitting a partitioned least squares model to data. Both an MLJ and native interface are provided.\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using\n\n```\nPartLS = @load PartLS pkg=PartitionedLS\n```\n\nConstruct an instance with default hyper-parameters using the syntax `model = PartLS()`. Provide keyword arguments to override hyper-parameter defaults, as in `model = PartLS(P=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any matrix or table with `Continuous` element scitype. Check column scitypes of a table `X` with `schema(X)`.\n * `y`: any vector with `Continuous` element scitype. Check scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach)`.\n\n## Hyper-parameters\n\n * `Optimizer`: the optimization algorithm to use. It can be `Opt`, `Alt` or `BnB` (names exported by `PartitionedLS.jl`).\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n * `η`: the regularization parameter. It controls the strength of the regularization.\n * `ϵ`: the tolerance parameter. It is used to determine when the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `T`: the maximum number of iterations. It is used to determine when to stop the Alt optimization algorithm has converged. Only used by the `Alt` algorithm.\n * `rng`: the random number generator to use.\n\n * If `nothing`, the global random number generator `rand` is used.\n * If an integer, the global number generator `rand` is used after seeding it with the given integer.\n * If an object of type `AbstractRNG`, the given random number generator is used.\n\n## Operations\n\n * `predict(mach, Xnew)`: return the predictions of the model on new data `Xnew`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `α`: the values of the α variables. For each partition `k`, it holds the values of the α variables are such that $\\sum_{i \\in P_k} \\alpha_{k} = 1$.\n * `β`: the values of the β variables. For each partition `k`, `β_k` is the coefficient that multiplies the features in the k-th partition.\n * `t`: the intercept term of the model.\n * `P`: the partition matrix. It is a binary matrix where each row corresponds to a partition and each column corresponds to a feature. The element `P_{k, i} = 1` if feature `i` belongs to partition `k`.\n\n## Examples\n\n```julia\nPartLS = @load PartLS pkg=PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\nmodel = PartLS(P=P)\nmach = machine(model, X, y) |> fit!\n\n# predictions on the training set:\npredict(mach, X)\n\n```\n\n# Native Interface\n\n```\nusing PartitionedLS\n\nX = [[1. 2. 3.];\n [3. 3. 4.];\n [8. 1. 3.];\n [5. 3. 1.]]\n\ny = [1.;\n 1.;\n 2.;\n 3.]\n\nP = [[1 0];\n [1 0];\n [0 1]]\n\n\n# fit using the optimal algorithm\nresult = fit(Opt, X, y, P, η = 0.0)\ny_hat = predict(result.model, X)\n```\n\nFor other `fit` keyword options, refer to the \"Hyper-parameters\" section for the MLJ interface.\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/ml-unito/PartitionedLS.jl.git" +":package_name" = "PartitionedLS" +":name" = "PartLS" ":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJLinearModels.QuantileRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJLinearModels.QuantileRegressor" +":hyperparameters" = "`(:delta, :lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" ":is_pure_julia" = "`true`" +":human_name" = "quantile regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nQuantileRegressor\n```\n\nA model type for constructing a quantile regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nQuantileRegressor = @load QuantileRegressor pkg=MLJLinearModels\n```\n\nDo `model = QuantileRegressor()` to construct an instance with default hyper-parameters.\n\nThis model coincides with [`RobustRegressor`](@ref), with the exception that the robust loss, `rho`, is fixed to `QuantileRho(delta)`, where `delta` is a new hyperparameter.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `delta::Real`: parameterizes the `QuantileRho` function (indicating the quantile to use with default `0.5` for the median regression) Default: 0.5\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(QuantileRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`RobustRegressor`](@ref), [`HuberRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" ":package_name" = "MLJLinearModels" +":name" = "QuantileRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":predict", ":QuantileRegressor"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJLinearModels.LogisticClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "MLJLinearModels.LogisticClassifier" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "logistic classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLogisticClassifier\n```\n\nA model type for constructing a logistic classifier, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels\n```\n\nDo `model = LogisticClassifier()` to construct an instance with default hyper-parameters.\n\nThis model is more commonly known as \"logistic regression\". It is a standard classifier for both binary and multiclass classification. The objective function applies either a logistic loss (binary target) or multinomial (softmax) loss, and has a mixed L1/L2 penalty:\n\n$L(y, Xθ) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁$.\n\nHere $L$ is either `MLJLinearModels.LogisticLoss` or `MLJLinearModels.MultiClassLoss`, $λ$ and $γ$ indicate the strength of the L2 (resp. L1) regularization components and $n$ is the number of training observations.\n\nWith `scale_penalty_with_samples = false` the objective function is instead\n\n$L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁$.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1` and strength of the L2 regularizer if `penalty` is `:en`. Default: eps()\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of samples. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `Newton`, `NewtonCG`, `ProxGrad`; but subject to the following restrictions:\n\n * If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxGrad` is the only option.\n * Unless `scitype(y) <: Finite{2}` (binary target) `Newton` is disallowed.\n\n If `solver = nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used, unless `gamma = 0`, in which case `LBFGS()` is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_blobs(centers = 2)\nmach = fit!(machine(LogisticClassifier(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`MultinomialClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "LogisticClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLogisticClassifier\n```\n\nA model type for constructing a logistic classifier, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels\n```\n\nDo `model = LogisticClassifier()` to construct an instance with default hyper-parameters.\n\nThis model is more commonly known as \"logistic regression\". It is a standard classifier for both binary and multiclass classification. The objective function applies either a logistic loss (binary target) or multinomial (softmax) loss, and has a mixed L1/L2 penalty:\n\n$L(y, Xθ) + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁$.\n\nHere $L$ is either `MLJLinearModels.LogisticLoss` or `MLJLinearModels.MultiClassLoss`, $λ$ and $γ$ indicate the strength of the L2 (resp. L1) regularization components and $n$ is the number of training observations.\n\nWith `scale_penalty_with_samples = false` the objective function is instead\n\n$L(y, Xθ) + λ|θ|₂²/2 + γ|θ|₁$.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1` and strength of the L2 regularizer if `penalty` is `:en`. Default: eps()\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of samples. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `Newton`, `NewtonCG`, `ProxGrad`; but subject to the following restrictions:\n\n * If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxGrad` is the only option.\n * Unless `scitype(y) <: Finite{2}` (binary target) `Newton` is disallowed.\n\n If `solver = nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used, unless `gamma = 0`, in which case `LBFGS()` is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_blobs(centers = 2)\nmach = fit!(machine(LogisticClassifier(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`MultinomialClassifier`](@ref).\n""" -":name" = "LogisticClassifier" -":human_name" = "logistic classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":LogisticClassifier"] -":hyperparameters" = "`(:lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJLinearModels.MultinomialClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "MLJLinearModels.MultinomialClassifier" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "multinomial classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultinomialClassifier\n```\n\nA model type for constructing a multinomial classifier, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialClassifier = @load MultinomialClassifier pkg=MLJLinearModels\n```\n\nDo `model = MultinomialClassifier()` to construct an instance with default hyper-parameters.\n\nThis model coincides with [`LogisticClassifier`](@ref), except certain optimizations possible in the special binary case will not be applied. Its hyperparameters are identical.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: eps()\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of samples. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `NewtonCG`, `ProxGrad`; but subject to the following restrictions:\n\n * If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxGrad` is the only option.\n * Unless `scitype(y) <: Finite{2}` (binary target) `Newton` is disallowed.\n\n If `solver = nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used, unless `gamma = 0`, in which case `LBFGS()` is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_blobs(centers = 3)\nmach = fit!(machine(MultinomialClassifier(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`LogisticClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "MultinomialClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultinomialClassifier\n```\n\nA model type for constructing a multinomial classifier, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialClassifier = @load MultinomialClassifier pkg=MLJLinearModels\n```\n\nDo `model = MultinomialClassifier()` to construct an instance with default hyper-parameters.\n\nThis model coincides with [`LogisticClassifier`](@ref), except certain optimizations possible in the special binary case will not be applied. Its hyperparameters are identical.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: eps()\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of samples. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `NewtonCG`, `ProxGrad`; but subject to the following restrictions:\n\n * If `penalty = :l2`, `ProxGrad` is disallowed. Otherwise, `ProxGrad` is the only option.\n * Unless `scitype(y) <: Finite{2}` (binary target) `Newton` is disallowed.\n\n If `solver = nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used, unless `gamma = 0`, in which case `LBFGS()` is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_blobs(centers = 3)\nmach = fit!(machine(MultinomialClassifier(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`LogisticClassifier`](@ref).\n""" -":name" = "MultinomialClassifier" -":human_name" = "multinomial classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":MultinomialClassifier"] -":hyperparameters" = "`(:lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJLinearModels.LADRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJLinearModels.LADRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "least absolute deviation regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLADRegressor\n```\n\nA model type for constructing a lad regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLADRegressor = @load LADRegressor pkg=MLJLinearModels\n```\n\nDo `model = LADRegressor()` to construct an instance with default hyper-parameters.\n\nLeast absolute deviation regression is a linear model with objective function\n\n$∑ρ(Xθ - y) + n⋅λ|θ|₂² + n⋅γ|θ|₁$\n\nwhere $ρ$ is the absolute loss and $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is instead\n\n$∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\nSee also `RobustRegressor`.\n\n## Parameters\n\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(LADRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "LADRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLADRegressor\n```\n\nA model type for constructing a lad regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLADRegressor = @load LADRegressor pkg=MLJLinearModels\n```\n\nDo `model = LADRegressor()` to construct an instance with default hyper-parameters.\n\nLeast absolute deviation regression is a linear model with objective function\n\n$∑ρ(Xθ - y) + n⋅λ|θ|₂² + n⋅γ|θ|₁$\n\nwhere $ρ$ is the absolute loss and $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is instead\n\n$∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\nSee also `RobustRegressor`.\n\n## Parameters\n\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(LADRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n""" -":name" = "LADRegressor" -":human_name" = "least absolute deviation regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":LADRegressor"] -":hyperparameters" = "`(:lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJLinearModels.RidgeRegressor] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":is_wrapper" = "`false`" + +[MLJLinearModels.RidgeRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJLinearModels.RidgeRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:lambda, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "ridge regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MLJLinearModels\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters.\n\nRidge regression is a linear model with objective function\n\n$|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2$\n\nwhere $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` then the objective function is instead\n\n$|Xθ - y|₂²/2 + λ|θ|₂²/2$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the L2 regularization. Default: 1.0\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: any instance of `MLJLinearModels.Analytical`. Use `Analytical()` for Cholesky and `CG()=Analytical(iterative=true)` for conjugate-gradient. If `solver = nothing` (default) then `Analytical()` is used. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(RidgeRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`ElasticNetRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "RidgeRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MLJLinearModels\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters.\n\nRidge regression is a linear model with objective function\n\n$|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2$\n\nwhere $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` then the objective function is instead\n\n$|Xθ - y|₂²/2 + λ|θ|₂²/2$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the L2 regularization. Default: 1.0\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: any instance of `MLJLinearModels.Analytical`. Use `Analytical()` for Cholesky and `CG()=Analytical(iterative=true)` for conjugate-gradient. If `solver = nothing` (default) then `Analytical()` is used. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(RidgeRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`ElasticNetRegressor`](@ref).\n""" -":name" = "RidgeRegressor" -":human_name" = "ridge regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":RidgeRegressor"] -":hyperparameters" = "`(:lambda, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJLinearModels.RobustRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"MLJLinearModels.RobustRho\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJLinearModels.RobustRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:rho, :lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "robust regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nRobustRegressor\n```\n\nA model type for constructing a robust regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRobustRegressor = @load RobustRegressor pkg=MLJLinearModels\n```\n\nDo `model = RobustRegressor()` to construct an instance with default hyper-parameters.\n\nRobust regression is a linear model with objective function\n\n$∑ρ(Xθ - y) + n⋅λ|θ|₂² + n⋅γ|θ|₁$\n\nwhere $ρ$ is a robust loss function (e.g. the Huber function) and $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is instead\n\n$∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rho::MLJLinearModels.RobustRho`: the type of robust loss, which can be any instance of `MLJLinearModels.L` where `L` is one of: `AndrewsRho`, `BisquareRho`, `FairRho`, `HuberRho`, `LogisticRho`, `QuantileRho`, `TalwarRho`, `HuberRho`, `TalwarRho`. Default: HuberRho(0.1)\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, `Newton`, `NewtonCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(RobustRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`HuberRegressor`](@ref), [`QuantileRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "RobustRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nRobustRegressor\n```\n\nA model type for constructing a robust regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRobustRegressor = @load RobustRegressor pkg=MLJLinearModels\n```\n\nDo `model = RobustRegressor()` to construct an instance with default hyper-parameters.\n\nRobust regression is a linear model with objective function\n\n$∑ρ(Xθ - y) + n⋅λ|θ|₂² + n⋅γ|θ|₁$\n\nwhere $ρ$ is a robust loss function (e.g. the Huber function) and $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is instead\n\n$∑ρ(Xθ - y) + λ|θ|₂² + γ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `rho::MLJLinearModels.RobustRho`: the type of robust loss, which can be any instance of `MLJLinearModels.L` where `L` is one of: `AndrewsRho`, `BisquareRho`, `FairRho`, `HuberRho`, `LogisticRho`, `QuantileRho`, `TalwarRho`, `HuberRho`, `TalwarRho`. Default: HuberRho(0.1)\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, `Newton`, `NewtonCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(RobustRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`HuberRegressor`](@ref), [`QuantileRegressor`](@ref).\n""" -":name" = "RobustRegressor" -":human_name" = "robust regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":RobustRegressor"] -":hyperparameters" = "`(:rho, :lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"MLJLinearModels.RobustRho\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJLinearModels.ElasticNetRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJLinearModels.ElasticNetRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:lambda, :gamma, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "elastic net regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nElasticNetRegressor\n```\n\nA model type for constructing a elastic net regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nElasticNetRegressor = @load ElasticNetRegressor pkg=MLJLinearModels\n```\n\nDo `model = ElasticNetRegressor()` to construct an instance with default hyper-parameters.\n\nElastic net is a linear model with objective function\n\n$|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁$\n\nwhere $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is instead\n\n$|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the L2 regularization. Default: 1.0\n * `gamma::Real`: strength of the L1 regularization. Default: 0.0\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: any instance of `MLJLinearModels.ProxGrad`.\n\n If `solver=nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)`. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(ElasticNetRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`LassoRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "ElasticNetRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nElasticNetRegressor\n```\n\nA model type for constructing a elastic net regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nElasticNetRegressor = @load ElasticNetRegressor pkg=MLJLinearModels\n```\n\nDo `model = ElasticNetRegressor()` to construct an instance with default hyper-parameters.\n\nElastic net is a linear model with objective function\n\n$|Xθ - y|₂²/2 + n⋅λ|θ|₂²/2 + n⋅γ|θ|₁$\n\nwhere $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is instead\n\n$|Xθ - y|₂²/2 + λ|θ|₂²/2 + γ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the L2 regularization. Default: 1.0\n * `gamma::Real`: strength of the L1 regularization. Default: 0.0\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: any instance of `MLJLinearModels.ProxGrad`.\n\n If `solver=nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)`. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(ElasticNetRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`LassoRegressor`](@ref).\n""" -":name" = "ElasticNetRegressor" -":human_name" = "elastic net regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":ElasticNetRegressor"] -":hyperparameters" = "`(:lambda, :gamma, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJLinearModels.LinearRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJLinearModels.LinearRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:fit_intercept, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "linear regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters.\n\nThis model provides standard linear regression with objective function\n\n$|Xθ - y|₂²/2$\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: \"any instance of `MLJLinearModels.Analytical`. Use `Analytical()` for Cholesky and `CG()=Analytical(iterative=true)` for conjugate-gradient.\n\n If `solver = nothing` (default) then `Analytical()` is used. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(LinearRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "LinearRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters.\n\nThis model provides standard linear regression with objective function\n\n$|Xθ - y|₂²/2$\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: \"any instance of `MLJLinearModels.Analytical`. Use `Analytical()` for Cholesky and `CG()=Analytical(iterative=true)` for conjugate-gradient.\n\n If `solver = nothing` (default) then `Analytical()` is used. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(LinearRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n""" -":name" = "LinearRegressor" -":human_name" = "linear regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":LinearRegressor"] -":hyperparameters" = "`(:fit_intercept, :solver)`" -":hyperparameter_types" = "`(\"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJLinearModels.LassoRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJLinearModels.LassoRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:lambda, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "lasso regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLassoRegressor\n```\n\nA model type for constructing a lasso regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLassoRegressor = @load LassoRegressor pkg=MLJLinearModels\n```\n\nDo `model = LassoRegressor()` to construct an instance with default hyper-parameters.\n\nLasso regression is a linear model with objective function\n\n$|Xθ - y|₂²/2 + n⋅λ|θ|₁$\n\nwhere $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is\n\n$|Xθ - y|₂²/2 + λ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the L1 regularization. Default: 1.0\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: any instance of `MLJLinearModels.ProxGrad`. If `solver=nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used. Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)`. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(LassoRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`ElasticNetRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "LassoRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLassoRegressor\n```\n\nA model type for constructing a lasso regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLassoRegressor = @load LassoRegressor pkg=MLJLinearModels\n```\n\nDo `model = LassoRegressor()` to construct an instance with default hyper-parameters.\n\nLasso regression is a linear model with objective function\n\n$|Xθ - y|₂²/2 + n⋅λ|θ|₁$\n\nwhere $n$ is the number of observations.\n\nIf `scale_penalty_with_samples = false` the objective function is\n\n$|Xθ - y|₂²/2 + λ|θ|₁$.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `lambda::Real`: strength of the L1 regularization. Default: 1.0\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: any instance of `MLJLinearModels.ProxGrad`. If `solver=nothing` (default) then `ProxGrad(accel=true)` (FISTA) is used. Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)`. Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(LassoRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`ElasticNetRegressor`](@ref).\n""" -":name" = "LassoRegressor" -":human_name" = "lasso regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":LassoRegressor"] -":hyperparameters" = "`(:lambda, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJLinearModels.HuberRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" +":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJLinearModels" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJLinearModels.HuberRegressor" -":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" +":hyperparameters" = "`(:delta, :lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" +":is_pure_julia" = "`true`" +":human_name" = "huber regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nHuberRegressor\n```\n\nA model type for constructing a huber regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHuberRegressor = @load HuberRegressor pkg=MLJLinearModels\n```\n\nDo `model = HuberRegressor()` to construct an instance with default hyper-parameters.\n\nThis model coincides with [`RobustRegressor`](@ref), with the exception that the robust loss, `rho`, is fixed to `HuberRho(delta)`, where `delta` is a new hyperparameter.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `delta::Real`: parameterizes the `HuberRho` function (radius of the ball within which the loss is a quadratic loss) Default: 0.5\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, `Newton`, `NewtonCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(HuberRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`RobustRegressor`](@ref), [`QuantileRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJLinearModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJLinearModels" +":name" = "HuberRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nHuberRegressor\n```\n\nA model type for constructing a huber regressor, based on [MLJLinearModels.jl](https://github.com/alan-turing-institute/MLJLinearModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHuberRegressor = @load HuberRegressor pkg=MLJLinearModels\n```\n\nDo `model = HuberRegressor()` to construct an instance with default hyper-parameters.\n\nThis model coincides with [`RobustRegressor`](@ref), with the exception that the robust loss, `rho`, is fixed to `HuberRho(delta)`, where `delta` is a new hyperparameter.\n\nDifferent solver options exist, as indicated under \"Hyperparameters\" below. \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns have `Continuous` scitype; check column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `delta::Real`: parameterizes the `HuberRho` function (radius of the ball within which the loss is a quadratic loss) Default: 0.5\n * `lambda::Real`: strength of the regularizer if `penalty` is `:l2` or `:l1`. Strength of the L2 regularizer if `penalty` is `:en`. Default: 1.0\n * `gamma::Real`: strength of the L1 regularizer if `penalty` is `:en`. Default: 0.0\n * `penalty::Union{String, Symbol}`: the penalty to use, either `:l2`, `:l1`, `:en` (elastic net) or `:none`. Default: :l2\n * `fit_intercept::Bool`: whether to fit the intercept or not. Default: true\n * `penalize_intercept::Bool`: whether to penalize the intercept. Default: false\n * `scale_penalty_with_samples::Bool`: whether to scale the penalty with the number of observations. Default: true\n * `solver::Union{Nothing, MLJLinearModels.Solver}`: some instance of `MLJLinearModels.S` where `S` is one of: `LBFGS`, `IWLSCG`, `Newton`, `NewtonCG`, if `penalty = :l2`, and `ProxGrad` otherwise.\n\n If `solver = nothing` (default) then `LBFGS()` is used, if `penalty = :l2`, and otherwise `ProxGrad(accel=true)` (FISTA) is used.\n\n Solver aliases: `FISTA(; kwargs...) = ProxGrad(accel=true, kwargs...)`, `ISTA(; kwargs...) = ProxGrad(accel=false, kwargs...)` Default: nothing\n\n## Example\n\n```\nusing MLJ\nX, y = make_regression()\nmach = fit!(machine(HuberRegressor(), X, y))\npredict(mach, X)\nfitted_params(mach)\n```\n\nSee also [`RobustRegressor`](@ref), [`QuantileRegressor`](@ref).\n""" -":name" = "HuberRegressor" -":human_name" = "huber regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict", ":HuberRegressor"] -":hyperparameters" = "`(:delta, :lambda, :gamma, :penalty, :fit_intercept, :penalize_intercept, :scale_penalty_with_samples, :solver)`" -":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [Maxnet.MaxnetBinaryClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{String, Vector{<:Maxnet.AbstractFeatureClass}}\", \"Float64\", \"Any\", \"Bool\", \"Integer\", \"Float64\", \"GLM.Link\", \"Bool\", \"Any\")`" +":package_uuid" = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "Maxnet" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "Maxnet.MaxnetBinaryClassifier" -":package_uuid" = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" +":hyperparameters" = "`(:features, :regularization_multiplier, :regularization_function, :addsamplestobackground, :n_knots, :weight_factor, :link, :clamp, :kw)`" +":is_pure_julia" = "`false`" +":human_name" = "Maxnet" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMaxnetBinaryClassifier\n```\n\nA model type for constructing a Maxnet, based on [Maxnet.jl](https://github.com/tiemvanderdeure/Maxnet.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMaxnetBinaryClassifier = @load MaxnetBinaryClassifier pkg=Maxnet\n```\n\nDo `model = MaxnetBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MaxnetBinaryClassifier(features=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous` or `<:Multiclass`. Check `scitypes` with `schema(X)`.\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Binary`. The first class should refer to background values, and the second class to presence values.\n\n# Hyper-parameters\n\n * `features`: Specifies which features classes to use in the model, e.g. \"lqh\" for linear, quadratic and hinge features. See also [Maxnet.maxnet](@ref)\n * `regularization_multiplier = 1.0`: 'Adjust how tight the model will fit. Increasing this will reduce overfitting.\n * `regularization_function`: A function to compute the regularization of each feature class. Defaults to `Maxnet.default_regularization`\n * `addsamplestobackground = true`: Controls wether to add presence values to the background.\n * `n_knots = 50`: The number of knots used for Threshold and Hinge features. A higher number gives more flexibility for these features.\n * `weight_factor = 100.0`: A `Float64` value to adjust the weight of the background samples.\n * `link = Maxnet.CloglogLink()`: The link function to use when predicting. See `Maxnet.predict`\n * `clamp = false`: Clamp values passed to `MLJBase.predict` to the range the model was trained on.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic and can be interpreted as the probability of presence.\n\n# Fitted Parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `Tuple` where the first entry is the `Maxnet.MaxnetModel` returned by the Maxnet algorithm and the second the entry is the classes of `y`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `selected_variables`: A `Vector` of `Symbols` of the variables that were selected.\n * `selected_features`: A `Vector` of `Maxnet.ModelMatrixColumn` with the features that were selected.\n * `complexity`: the number of selected features in the model.\n\n# Example\n\n```@example\nusing MLJBase, Maxnet\np_a, env = Maxnet.bradypus()\ny = coerce(p_a, Binary)\nX = coerce(env, Count => Continuous)\n\nmach = machine(MaxnetBinaryClassifier(features = \"lqp\"), X, y)\nfit!(mach)\nyhat = MLJBase.predict(mach, env)\n\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/tiemvanderdeure/Maxnet.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Maxnet" +":name" = "MaxnetBinaryClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMaxnetBinaryClassifier\n```\n\nA model type for constructing a Maxnet, based on [Maxnet.jl](https://github.com/tiemvanderdeure/Maxnet.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMaxnetBinaryClassifier = @load MaxnetBinaryClassifier pkg=Maxnet\n```\n\nDo `model = MaxnetBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MaxnetBinaryClassifier(features=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous` or `<:Multiclass`. Check `scitypes` with `schema(X)`.\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Binary`. The first class should refer to background values, and the second class to presence values.\n\n# Hyper-parameters\n\n * `features`: Specifies which features classes to use in the model, e.g. \"lqh\" for linear, quadratic and hinge features. See also [Maxnet.maxnet](@ref)\n * `regularization_multiplier = 1.0`: 'Adjust how tight the model will fit. Increasing this will reduce overfitting.\n * `regularization_function`: A function to compute the regularization of each feature class. Defaults to `Maxnet.default_regularization`\n * `addsamplestobackground = true`: Controls wether to add presence values to the background.\n * `n_knots = 50`: The number of knots used for Threshold and Hinge features. A higher number gives more flexibility for these features.\n * `weight_factor = 100.0`: A `Float64` value to adjust the weight of the background samples.\n * `link = Maxnet.CloglogLink()`: The link function to use when predicting. See `Maxnet.predict`\n * `clamp = false`: Clamp values passed to `MLJBase.predict` to the range the model was trained on.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic and can be interpreted as the probability of presence.\n\n# Fitted Parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: A `Tuple` where the first entry is the `Maxnet.MaxnetModel` returned by the Maxnet algorithm and the second the entry is the classes of `y`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `selected_variables`: A `Vector` of `Symbols` of the variables that were selected.\n * `selected_features`: A `Vector` of `Maxnet.ModelMatrixColumn` with the features that were selected.\n * `complexity`: the number of selected features in the model.\n\n# Example\n\n```@example\nusing MLJBase, Maxnet\np_a, env = Maxnet.bradypus()\ny = coerce(p_a, Binary)\nX = coerce(env, Count => Continuous)\n\nmach = machine(MaxnetBinaryClassifier(features = \"lqp\"), X, y)\nfit!(mach)\nyhat = MLJBase.predict(mach, env)\n\n```\n""" -":name" = "MaxnetBinaryClassifier" -":human_name" = "Maxnet" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`(:features, :regularization_multiplier, :regularization_function, :addsamplestobackground, :n_knots, :weight_factor, :link, :clamp, :kw)`" -":hyperparameter_types" = "`(\"Union{String, Vector{<:Maxnet.AbstractFeatureClass}}\", \"Float64\", \"Any\", \"Bool\", \"Integer\", \"Float64\", \"GLM.Link\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [ParallelKMeans.KMeans] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Symbol, ParallelKMeans.AbstractKMeansAlg}\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\", \"Any\", \"Any\")`" +":package_uuid" = "42b8e9d4-006b-409a-8472-7f34b3fb58af" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "ParallelKMeans" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "ParallelKMeans.KMeans" -":package_uuid" = "42b8e9d4-006b-409a-8472-7f34b3fb58af" +":hyperparameters" = "`(:algo, :k_init, :k, :tol, :max_iters, :copy, :threads, :rng, :weights, :init)`" +":is_pure_julia" = "`true`" +":human_name" = "k means" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Parallel & lightning fast implementation of all available variants of the KMeans clustering algorithm\n in native Julia. Compatible with Julia 1.3+""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":package_url" = "https://github.com/PyDataBlog/ParallelKMeans.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "ParallelKMeans" +":name" = "KMeans" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Parallel & lightning fast implementation of all available variants of the KMeans clustering algorithm\n in native Julia. Compatible with Julia 1.3+""" -":name" = "KMeans" -":human_name" = "k means" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:algo, :k_init, :k, :tol, :max_iters, :copy, :threads, :rng, :weights, :init)`" -":hyperparameter_types" = "`(\"Union{Symbol, ParallelKMeans.AbstractKMeansAlg}\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[NaiveBayes.GaussianNBClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +[MLJNaiveBayesInterface.GaussianNBClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`()`" +":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" +":hyperparameter_ranges" = "`()`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "NaiveBayes" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "unknown" +":prediction_type" = ":probabilistic" ":load_path" = "MLJNaiveBayesInterface.GaussianNBClassifier" -":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" +":hyperparameters" = "`()`" +":is_pure_julia" = "`true`" +":human_name" = "Gaussian naive Bayes classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nGaussianNBClassifier\n```\n\nA model type for constructing a Gaussian naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGaussianNBClassifier = @load GaussianNBClassifier pkg=NaiveBayes\n```\n\nDo `model = GaussianNBClassifier()` to construct an instance with default hyper-parameters. \n\nGiven each class taken on by the target variable `y`, it is supposed that the conditional probability distribution for the input variables `X` is a multivariate Gaussian. The mean and covariance of these Gaussian distributions are estimated using maximum likelihood, and a probability distribution for `y` given `X` is deduced by applying Bayes' rule. The required marginal for `y` is estimated using class frequency in the training data.\n\n**Important.** The name \"naive Bayes classifier\" is perhaps misleading. Since we are learning the full multivariate Gaussian distributions for `X` given `y`, we are not applying the usual naive Bayes independence condition, which would amount to forcing the covariance matrix to be diagonal.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `c_stats`: A dictionary containing observed statistics on each input class. Each class is represented by a `DataStats` object, with the following fields:\n\n * `n_vars`: The number of variables used to describe the class's behavior.\n * `n_obs`: The number of times the class is observed.\n * `obs_axis`: The axis along which the observations were computed.\n * `gaussians`: A per class dictionary of Gaussians, each representing the distribution of the class. Represented with type `Distributions.MvNormal` from the Distributions.jl package.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nGaussianNB = @load GaussianNBClassifier pkg=NaiveBayes\n\nX, y = @load_iris\nclf = GaussianNB()\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\npreds = predict(mach, X) # probabilistic predictions\npreds[1]\npredict_mode(mach, X) # point predictions\n```\n\nSee also [`MultinomialNBClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/dfdx/NaiveBayes.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "NaiveBayes" +":name" = "GaussianNBClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nGaussianNBClassifier\n```\n\nA model type for constructing a Gaussian naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nGaussianNBClassifier = @load GaussianNBClassifier pkg=NaiveBayes\n```\n\nDo `model = GaussianNBClassifier()` to construct an instance with default hyper-parameters. \n\nGiven each class taken on by the target variable `y`, it is supposed that the conditional probability distribution for the input variables `X` is a multivariate Gaussian. The mean and covariance of these Gaussian distributions are estimated using maximum likelihood, and a probability distribution for `y` given `X` is deduced by applying Bayes' rule. The required marginal for `y` is estimated using class frequency in the training data.\n\n**Important.** The name \"naive Bayes classifier\" is perhaps misleading. Since we are learning the full multivariate Gaussian distributions for `X` given `y`, we are not applying the usual naive Bayes independence condition, which would amount to forcing the covariance matrix to be diagonal.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `c_stats`: A dictionary containing observed statistics on each input class. Each class is represented by a `DataStats` object, with the following fields:\n\n * `n_vars`: The number of variables used to describe the class's behavior.\n * `n_obs`: The number of times the class is observed.\n * `obs_axis`: The axis along which the observations were computed.\n * `gaussians`: A per class dictionary of Gaussians, each representing the distribution of the class. Represented with type `Distributions.MvNormal` from the Distributions.jl package.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nGaussianNB = @load GaussianNBClassifier pkg=NaiveBayes\n\nX, y = @load_iris\nclf = GaussianNB()\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\npreds = predict(mach, X) # probabilistic predictions\npreds[1]\npredict_mode(mach, X) # point predictions\n```\n\nSee also [`MultinomialNBClassifier`](@ref)\n""" -":name" = "GaussianNBClassifier" -":human_name" = "Gaussian naive Bayes classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`()`" -":hyperparameter_types" = "`()`" -":hyperparameter_ranges" = "`()`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[NaiveBayes.MultinomialNBClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +[MLJNaiveBayesInterface.MultinomialNBClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\",)`" +":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "NaiveBayes" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "unknown" +":prediction_type" = ":probabilistic" ":load_path" = "MLJNaiveBayesInterface.MultinomialNBClassifier" -":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" +":hyperparameters" = "`(:alpha,)`" +":is_pure_julia" = "`true`" +":human_name" = "multinomial naive Bayes classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultinomialNBClassifier(alpha=...)`.\n\nThe [multinomial naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes) is often applied when input features consist of a counts (scitype `Count`) and when observations for a fixed target class are generated from a multinomial distribution with fixed probability vector, but whose sample length varies from observation to observation. For example, features might represent word counts in text documents being classified by sentiment.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Count`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `alpha=1`: Lindstone smoothing in estimation of multinomial probability vectors from training histograms (default corresponds to Laplacian smoothing).\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `x_counts`: A dictionary containing the categorical counts of each input class.\n * `x_totals`: The sum of each count (input feature), ungrouped.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n\ntokenized_docs = TextAnalysis.tokenize.([\n \"I am very mad. You never listen.\",\n \"You seem to be having trouble? Can I help you?\",\n \"Our boss is mad at me. I hope he dies.\",\n \"His boss wants to help me. She is nice.\",\n \"Thank you for your help. It is nice working with you.\",\n \"Never do that again! I am so mad. \",\n])\n\nsentiment = [\n \"negative\",\n \"positive\",\n \"negative\",\n \"positive\",\n \"positive\",\n \"negative\",\n]\n\nmach1 = machine(CountTransformer(), tokenized_docs) |> fit!\n\n# matrix of counts:\nX = transform(mach1, tokenized_docs)\n\n# to ensure scitype(y) <: AbstractVector{<:OrderedFactor}:\ny = coerce(sentiment, OrderedFactor)\n\nclassifier = MultinomialNBClassifier()\nmach2 = machine(classifier, X, y)\nfit!(mach2, rows=1:4)\n\n# probabilistic predictions:\ny_prob = predict(mach2, rows=5:6) # distributions\npdf.(y_prob, \"positive\") # probabilities for \"positive\"\nlog_loss(y_prob, y[5:6])\n\n# point predictions:\nyhat = mode.(y_prob) # or `predict_mode(mach2, rows=5:6)`\n```\n\nSee also [`GaussianNBClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/dfdx/NaiveBayes.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "NaiveBayes" +":name" = "MultinomialNBClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultinomialNBClassifier\n```\n\nA model type for constructing a multinomial naive Bayes classifier, based on [NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n```\n\nDo `model = MultinomialNBClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultinomialNBClassifier(alpha=...)`.\n\nThe [multinomial naive Bayes classifier](https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes) is often applied when input features consist of a counts (scitype `Count`) and when observations for a fixed target class are generated from a multinomial distribution with fixed probability vector, but whose sample length varies from observation to observation. For example, features might represent word counts in text documents being classified by sentiment.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Count`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `alpha=1`: Lindstone smoothing in estimation of multinomial probability vectors from training histograms (default corresponds to Laplacian smoothing).\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `predict_mode(mach, Xnew)`: Return the mode of above predictions.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `c_counts`: A dictionary containing the observed count of each input class.\n * `x_counts`: A dictionary containing the categorical counts of each input class.\n * `x_totals`: The sum of each count (input feature), ungrouped.\n * `n_obs`: The total number of observations in the training data.\n\n# Examples\n\n```\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\nMultinomialNBClassifier = @load MultinomialNBClassifier pkg=NaiveBayes\n\ntokenized_docs = TextAnalysis.tokenize.([\n \"I am very mad. You never listen.\",\n \"You seem to be having trouble? Can I help you?\",\n \"Our boss is mad at me. I hope he dies.\",\n \"His boss wants to help me. She is nice.\",\n \"Thank you for your help. It is nice working with you.\",\n \"Never do that again! I am so mad. \",\n])\n\nsentiment = [\n \"negative\",\n \"positive\",\n \"negative\",\n \"positive\",\n \"positive\",\n \"negative\",\n]\n\nmach1 = machine(CountTransformer(), tokenized_docs) |> fit!\n\n# matrix of counts:\nX = transform(mach1, tokenized_docs)\n\n# to ensure scitype(y) <: AbstractVector{<:OrderedFactor}:\ny = coerce(sentiment, OrderedFactor)\n\nclassifier = MultinomialNBClassifier()\nmach2 = machine(classifier, X, y)\nfit!(mach2, rows=1:4)\n\n# probabilistic predictions:\ny_prob = predict(mach2, rows=5:6) # distributions\npdf.(y_prob, \"positive\") # probabilities for \"positive\"\nlog_loss(y_prob, y[5:6])\n\n# point predictions:\nyhat = mode.(y_prob) # or `predict_mode(mach2, rows=5:6)`\n```\n\nSee also [`GaussianNBClassifier`](@ref)\n""" -":name" = "MultinomialNBClassifier" -":human_name" = "multinomial naive Bayes classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:alpha,)`" -":hyperparameter_types" = "`(\"Int64\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJBase.Pipeline] -":input_scitype" = "`ScientificTypesBase.Unknown`" +[MLJDecisionTreeInterface.AdaBoostStumpClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "unknown" -":load_path" = "MLJBase.Pipeline" -":package_uuid" = "unknown" -":package_url" = "unknown" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJDecisionTreeInterface.AdaBoostStumpClassifier" +":hyperparameters" = "`(:n_iter, :feature_importance, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "Ada-boosted stump classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nAdaBoostStumpClassifier\n```\n\nA model type for constructing a Ada-boosted stump classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostStumpClassifier = @load AdaBoostStumpClassifier pkg=DecisionTree\n```\n\nDo `model = AdaBoostStumpClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostStumpClassifier(n_iter=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `n_iter=10`: number of iterations of AdaBoost\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted Parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `stumps`: the `Ensemble` object returned by the core DecisionTree.jl algorithm.\n * `coefficients`: the stump coefficients (one per stump)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nBooster = @load AdaBoostStumpClassifier pkg=DecisionTree\nbooster = Booster(n_iter=15)\n\nX, y = @load_iris\nmach = machine(booster, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).stumps # raw `Ensemble` object from DecisionTree.jl\nfitted_params(mach).coefs # coefficient associated with each stump\nfeature_importances(mach)\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.AdaBoostStumpClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" +":package_name" = "DecisionTree" +":name" = "AdaBoostStumpClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```julia\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Transformers that need a target in training\n\nSome transformers that have type `Unsupervised` (so that the output of `transform` is propagated in pipelines) may require a target variable for training. An example are so-called target encoders (which transform categorical input features, based on some target observations). Provided they appear before any `Supervised` component in the pipelines, such models are supported. Of course a target must be provided whenever training such a pipeline, whether or not it contains a `Supervised` component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n""" -":name" = "Pipeline" -":human_name" = "static pipeline" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" -":implemented_methods" = [] -":hyperparameters" = "`(:named_components, :cache)`" -":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows"] ":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`Pipeline`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[MLJBase.Resampler] -":input_scitype" = "`ScientificTypesBase.Unknown`" +[MLJDecisionTreeInterface.DecisionTreeRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`ScientificTypesBase.Unknown`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJDecisionTreeInterface.DecisionTreeRegressor" +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :feature_importance, :rng)`" ":is_pure_julia" = "`true`" -":package_name" = "MLJBase" -":package_license" = "unknown" -":load_path" = "MLJBase.Resampler" -":package_uuid" = "unknown" -":package_url" = "unknown" -":is_wrapper" = "`true`" -":supports_weights" = "`missing`" -":supports_class_weights" = "`missing`" -":supports_online" = "`false`" -":docstring" = """```\nresampler = Resampler(\n model=ConstantRegressor(),\n resampling=CV(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing\n operation=predict,\n repeats = 1,\n acceleration=default_resource(),\n check_measure=true,\n per_observation=true,\n logger=default_logger(),\n compact=false,\n)\n```\n\n*Private method.* Use at own risk.\n\nResampling model wrapper, used internally by the `fit` method of `TunedModel` instances and `IteratedModel` instances. See [`evaluate!`](@ref) for meaning of the options. Not intended for use by general user, who will ordinarily use [`evaluate!`](@ref) directly.\n\nGiven a machine `mach = machine(resampler, args...)` one obtains a performance evaluation of the specified `model`, performed according to the prescribed `resampling` strategy and other parameters, using data `args...`, by calling `fit!(mach)` followed by `evaluate(mach)`.\n\nOn subsequent calls to `fit!(mach)` new train/test pairs of row indices are only regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The evolution of an RNG field of `resampler` does *not* constitute a change (`==` for `MLJType` objects is not sensitive to such changes; see [`is_same_except`](@ref)).\n\nIf there is single train/test pair, then warm-restart behavior of the wrapped model `resampler.model` will extend to warm-restart behaviour of the wrapper `resampler`, with respect to mutations of the wrapped model.\n\nThe sample `weights` are passed to the specified performance measures that support weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n\nThe sample `class_weights` are passed to the specified performance measures that support per-class weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n""" -":name" = "Resampler" -":human_name" = "resampler" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`Any`" -":implemented_methods" = [":clean!", ":evaluate", ":fit", ":fitted_params", ":update"] -":hyperparameters" = "`(:model, :resampling, :measure, :weights, :class_weights, :operation, :acceleration, :check_measure, :repeats, :cache, :per_observation, :logger, :compact)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict{<:Any, <:Real}}\", \"Any\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Int64\", \"Bool\", \"Bool\", \"Any\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":human_name" = "CART decision tree regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`MLJBase.Resampler`" - -[MLJBase.Stack] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```\nDecisionTreeRegressor\n```\n\nA model type for constructing a CART decision tree regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\n```\n\nDo `model = DecisionTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n * `features`: the names of the features encountered in training\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\nmodel = DecisionTreeRegressor(max_depth=3, min_samples_split=3)\n\nX, y = make_regression(100, 4; rng=123) # synthetic data\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2; rng=123)\nyhat = predict(mach, Xnew) # new predictions\n\njulia> fitted_params(mach).tree\nx1 < 0.2758\n├─ x2 < 0.9137\n│ ├─ x1 < -0.9582\n│ │ ├─ 0.9189256882087312 (0/12)\n│ │ └─ -0.23180616021065256 (0/38)\n│ └─ -1.6461153800037722 (0/9)\n└─ x1 < 1.062\n ├─ x2 < -0.4969\n │ ├─ -0.9330755147107384 (0/5)\n │ └─ -2.3287967825015548 (0/17)\n └─ x2 < 0.4598\n ├─ -2.931299926506291 (0/11)\n └─ -4.726518740473489 (0/8)\n\nfeature_importances(mach) # get feature importances\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeRegressor`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" +":package_name" = "DecisionTree" +":name" = "DecisionTreeRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.Stack" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nUnion{Types...}\n```\n\nA type union is an abstract type which includes all instances of any of its argument types. The empty union [`Union{}`](@ref) is the bottom type of Julia.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString\ntrue\n\njulia> \"Hello!\" isa IntOrString\ntrue\n\njulia> 1.0 isa IntOrString\nfalse\n```\n""" -":name" = "Stack" -":human_name" = "probabilistic stack" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:models, :metalearner, :resampling, :measures, :cache, :acceleration)`" -":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Probabilistic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows"] ":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`MLJBase.Stack`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[MLJBase.TransformedTargetModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" +[MLJDecisionTreeInterface.DecisionTreeClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJDecisionTreeInterface.DecisionTreeClassifier" +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :display_depth, :feature_importance, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "CART decision tree classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nDecisionTreeClassifier\n```\n\nA model type for constructing a CART decision tree classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\n```\n\nDo `model = DecisionTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeClassifier(max_depth=...)`.\n\n`DecisionTreeClassifier` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `display_depth=5`: max depth to show when displaying the tree\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `raw_tree`: the raw `Node`, `Leaf` or `Root` object returned by the core DecisionTree.jl algorithm\n * `tree`: a visualizable, wrapped version of `raw_tree` implementing the AbstractTrees.jl interface; see \"Examples\" below\n * `encoding`: dictionary of target classes keyed on integers used internally by DecisionTree.jl\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes_seen`: list of target classes actually observed in training\n * `print_tree`: alternative method to print the fitted tree, with single argument the tree depth; interpretation requires internal integer-class encoding (see \"Fitted parameters\" above).\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\nmodel = DecisionTreeClassifier(max_depth=3, min_samples_split=3)\n\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\njulia> tree = fitted_params(mach).tree\npetal_length < 2.45\n├─ setosa (50/50)\n└─ petal_width < 1.75\n ├─ petal_length < 4.95\n │ ├─ versicolor (47/48)\n │ └─ virginica (4/6)\n └─ petal_length < 4.85\n ├─ virginica (2/3)\n └─ virginica (43/43)\n\nusing Plots, TreeRecipe\nplot(tree) # for a graphical representation of the tree\n\nfeature_importances(mach)\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeClassifier`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" +":package_name" = "DecisionTree" +":name" = "DecisionTreeClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBase" -":package_license" = "MIT" -":load_path" = "MLJBase.TransformedTargetModel" -":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -":package_url" = "https://github.com/JuliaAI/MLJBase.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n""" -":name" = "TransformedTargetModel" -":human_name" = "transformed target model probabilistic" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows"] ":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`TransformedTargetModel`" - -[MultivariateStats.LDA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJDecisionTreeInterface.RandomForestRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJDecisionTreeInterface.RandomForestRegressor" +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :feature_importance, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "CART random forest regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":n_trees" +":docstring" = """```\nRandomForestRegressor\n```\n\nA model type for constructing a CART random forest regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n```\n\nDo `model = RandomForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestRegressor pkg=DecisionTree\nforest = Forest(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(forest, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTree.jl\nfeature_importances(mach)\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestRegressor`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" +":package_name" = "DecisionTree" +":name" = "RandomForestRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.LDA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":name" = "LDA" -":human_name" = "linear discriminant analysis model" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[MultivariateStats.MultitargetLinearRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJDecisionTreeInterface.RandomForestClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" +":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJDecisionTreeInterface.RandomForestClassifier" +":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :feature_importance, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "CART random forest classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":n_trees" +":docstring" = """```\nRandomForestClassifier\n```\n\nA model type for constructing a CART random forest classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestClassifier = @load RandomForestClassifier pkg=DecisionTree\n```\n\nDo `model = RandomForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestClassifier(max_depth=...)`.\n\n`RandomForestClassifier` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestClassifier pkg=DecisionTree\nforest = Forest(min_samples_split=6, n_subfeatures=3)\n\nX, y = @load_iris\nmach = machine(forest, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTrees.jl\n\nfeature_importances(mach) # `:impurity` feature importances\nforest.feature_importance = :split\nfeature_importance(mach) # `:split` feature importances\n\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" +":package_name" = "DecisionTree" +":name" = "RandomForestClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":name" = "MultitargetLinearRegressor" -":human_name" = "multitarget linear regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:bias,)`" -":hyperparameter_types" = "`(\"Bool\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MultivariateStats.BayesianSubspaceLDA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" ":is_wrapper" = "`false`" -":supports_weights" = "`false`" + +[MLJBase.Pipeline] +":constructor" = "`Pipeline`" +":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" +":package_uuid" = "unknown" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJBase.Pipeline" +":hyperparameters" = "`(:named_components, :cache)`" +":is_pure_julia" = "`false`" +":human_name" = "static pipeline" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nPipeline(component1, component2, ... , componentk; options...)\nPipeline(name1=component1, name2=component2, ..., namek=componentk; options...)\ncomponent1 |> component2 |> ... |> componentk\n```\n\nCreate an instance of a composite model type which sequentially composes the specified components in order. This means `component1` receives inputs, whose output is passed to `component2`, and so forth. A \"component\" is either a `Model` instance, a model type (converted immediately to its default instance) or any callable object. Here the \"output\" of a model is what `predict` returns if it is `Supervised`, or what `transform` returns if it is `Unsupervised`.\n\nNames for the component fields are automatically generated unless explicitly specified, as in\n\n```julia\nPipeline(encoder=ContinuousEncoder(drop_last=false),\n stand=Standardizer())\n```\n\nThe `Pipeline` constructor accepts keyword `options` discussed further below.\n\nOrdinary functions (and other callables) may be inserted in the pipeline as shown in the following example:\n\n```\nPipeline(X->coerce(X, :age=>Continuous), OneHotEncoder, ConstantClassifier)\n```\n\n### Syntactic sugar\n\nThe `|>` operator is overloaded to construct pipelines out of models, callables, and existing pipelines:\n\n```julia\nLinearRegressor = @load LinearRegressor pkg=MLJLinearModels add=true\nPCA = @load PCA pkg=MultivariateStats add=true\n\npipe1 = MLJBase.table |> ContinuousEncoder |> Standardizer\npipe2 = PCA |> LinearRegressor\npipe1 |> pipe2\n```\n\nAt most one of the components may be a supervised model, but this model can appear in any position. A pipeline with a `Supervised` component is itself `Supervised` and implements the `predict` operation. It is otherwise `Unsupervised` (possibly `Static`) and implements `transform`.\n\n### Special operations\n\nIf all the `components` are invertible unsupervised models (ie, implement `inverse_transform`) then `inverse_transform` is implemented for the pipeline. If there are no supervised models, then `predict` is nevertheless implemented, assuming the last component is a model that implements it (some clustering models). Similarly, calling `transform` on a supervised pipeline calls `transform` on the supervised component.\n\n### Transformers that need a target in training\n\nSome transformers that have type `Unsupervised` (so that the output of `transform` is propagated in pipelines) may require a target variable for training. An example are so-called target encoders (which transform categorical input features, based on some target observations). Provided they appear before any `Supervised` component in the pipelines, such models are supported. Of course a target must be provided whenever training such a pipeline, whether or not it contains a `Supervised` component.\n\n### Optional key-word arguments\n\n * `prediction_type` - prediction type of the pipeline; possible values: `:deterministic`, `:probabilistic`, `:interval` (default=`:deterministic` if not inferable)\n * `operation` - operation applied to the supervised component model, when present; possible values: `predict`, `predict_mean`, `predict_median`, `predict_mode` (default=`predict`)\n * `cache` - whether the internal machines created for component models should cache model-specific representations of data (see [`machine`](@ref)) (default=`true`)\n\n!!! warning\n Set `cache=false` to guarantee data anonymization.\n\n\nTo build more complicated non-branching pipelines, refer to the MLJ manual sections on composing models.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "unknown" +":package_name" = "MLJBase" +":name" = "Pipeline" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" -":name" = "BayesianSubspaceLDA" -":human_name" = "Bayesian subspace LDA model" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:normalize, :outdim, :priors)`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" -[MultivariateStats.FactorAnalysis] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" +[MLJBase.Resampler] +":constructor" = "`MLJBase.Resampler`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict{<:Any, <:Real}}\", \"Any\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Int64\", \"Bool\", \"Bool\", \"Any\", \"Bool\")`" +":package_uuid" = "unknown" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`Any`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJBase.Resampler" +":hyperparameters" = "`(:model, :resampling, :measure, :weights, :class_weights, :operation, :acceleration, :check_measure, :repeats, :cache, :per_observation, :logger, :compact)`" ":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" -":name" = "FactorAnalysis" -":human_name" = "factor analysis model" +":human_name" = "resampler" ":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" -":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nresampler = Resampler(\n model=ConstantRegressor(),\n resampling=CV(),\n measure=nothing,\n weights=nothing,\n class_weights=nothing\n operation=predict,\n repeats = 1,\n acceleration=default_resource(),\n check_measure=true,\n per_observation=true,\n logger=default_logger(),\n compact=false,\n)\n```\n\n*Private method.* Use at own risk.\n\nResampling model wrapper, used internally by the `fit` method of `TunedModel` instances and `IteratedModel` instances. See [`evaluate!`](@ref) for meaning of the options. Not intended for use by general user, who will ordinarily use [`evaluate!`](@ref) directly.\n\nGiven a machine `mach = machine(resampler, args...)` one obtains a performance evaluation of the specified `model`, performed according to the prescribed `resampling` strategy and other parameters, using data `args...`, by calling `fit!(mach)` followed by `evaluate(mach)`.\n\nOn subsequent calls to `fit!(mach)` new train/test pairs of row indices are only regenerated if `resampling`, `repeats` or `cache` fields of `resampler` have changed. The evolution of an RNG field of `resampler` does *not* constitute a change (`==` for `MLJType` objects is not sensitive to such changes; see [`is_same_except`](@ref)).\n\nIf there is single train/test pair, then warm-restart behavior of the wrapped model `resampler.model` will extend to warm-restart behaviour of the wrapper `resampler`, with respect to mutations of the wrapped model.\n\nThe sample `weights` are passed to the specified performance measures that support weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n\nThe sample `class_weights` are passed to the specified performance measures that support per-class weights for evaluation. These weights are not to be confused with any weights bound to a `Resampler` instance in a machine, used for training the wrapped `model` when supported.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "unknown" +":package_name" = "MLJBase" +":name" = "Resampler" +":target_in_fit" = "`false`" +":supports_class_weights" = "`missing`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":evaluate", ":fit", ":fitted_params", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`missing`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" -[MultivariateStats.LinearRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJBase.Stack] +":constructor" = "`MLJBase.Stack`" +":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Probabilistic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJBase.Stack" +":hyperparameters" = "`(:models, :metalearner, :resampling, :measures, :cache, :acceleration)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic stack" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnion{Types...}\n```\n\nA `Union` type is an abstract type which includes all instances of any of its argument types. This means that `T <: Union{T,S}` and `S <: Union{T,S}`.\n\nLike other abstract types, it cannot be instantiated, even if all of its arguments are non abstract.\n\n# Examples\n\n```jldoctest\njulia> IntOrString = Union{Int,AbstractString}\nUnion{Int64, AbstractString}\n\njulia> 1 isa IntOrString # instance of Int is included in the union\ntrue\n\njulia> \"Hello!\" isa IntOrString # String is also included\ntrue\n\njulia> 1.0 isa IntOrString # Float64 is not included because it is neither Int nor AbstractString\nfalse\n```\n\n# Extended Help\n\nUnlike most other parametric types, unions are covariant in their parameters. For example, `Union{Real, String}` is a subtype of `Union{Number, AbstractString}`.\n\nThe empty union [`Union{}`](@ref) is the bottom type of Julia.\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":package_name" = "MLJBase" +":name" = "Stack" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":name" = "LinearRegressor" -":human_name" = "linear regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:bias,)`" -":hyperparameter_types" = "`(\"Bool\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" -[MultivariateStats.ICA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" +[MLJBase.TransformedTargetModel] +":constructor" = "`TransformedTargetModel`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\")`" +":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.ICA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJBase.TransformedTargetModel" +":hyperparameters" = "`(:model, :transformer, :inverse, :cache)`" +":is_pure_julia" = "`false`" +":human_name" = "transformed target model probabilistic" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nTransformedTargetModel(model; transformer=nothing, inverse=nothing, cache=true)\n```\n\nWrap the supervised or semi-supervised `model` in a transformation of the target variable.\n\nHere `transformer` one of the following:\n\n * The `Unsupervised` model that is to transform the training target. By default (`inverse=nothing`) the parameters learned by this transformer are also used to inverse-transform the predictions of `model`, which means `transformer` must implement the `inverse_transform` method. If this is not the case, specify `inverse=identity` to suppress inversion.\n * A callable object for transforming the target, such as `y -> log.(y)`. In this case a callable `inverse`, such as `z -> exp.(z)`, should be specified.\n\nSpecify `cache=false` to prioritize memory over speed, or to guarantee data anonymity.\n\nSpecify `inverse=identity` if `model` is a probabilistic predictor, as inverse-transforming sample spaces is not supported. Alternatively, replace `model` with a deterministic model, such as `Pipeline(model, y -> mode.(y))`.\n\n### Examples\n\nA model that normalizes the target before applying ridge regression, with predictions returned on the original scale:\n\n```julia\n@load RidgeRegressor pkg=MLJLinearModels\nmodel = RidgeRegressor()\ntmodel = TransformedTargetModel(model, transformer=Standardizer())\n```\n\nA model that applies a static `log` transformation to the data, again returning predictions to the original scale:\n\n```julia\ntmodel2 = TransformedTargetModel(model, transformer=y->log.(y), inverse=z->exp.(y))\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJBase.jl" +":package_name" = "MLJBase" +":name" = "TransformedTargetModel" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":name" = "ICA" -":human_name" = "independent component analysis model" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" -[MultivariateStats.PPCA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.PPCA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +[MLJClusteringInterface.HierarchicalClustering] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" -":name" = "PPCA" -":human_name" = "probabilistic PCA model" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":hyperparameter_types" = "`(\"Symbol\", \"Distances.SemiMetric\", \"Symbol\", \"Union{Nothing, Float64}\", \"Int64\")`" +":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`(:predict,)`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJClusteringInterface.HierarchicalClustering" +":hyperparameters" = "`(:linkage, :metric, :branchorder, :h, :k)`" +":is_pure_julia" = "`true`" +":human_name" = "hierarchical clusterer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nHierarchicalClustering\n```\n\nA model type for constructing a hierarchical clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHierarchicalClustering = @load HierarchicalClustering pkg=Clustering\n```\n\nDo `model = HierarchicalClustering()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HierarchicalClustering(linkage=...)`.\n\n[Hierarchical Clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) is a clustering algorithm that organizes the data in a dendrogram based on distances between groups of points and computes cluster assignments by cutting the dendrogram at a given height. More information is available at the [Clustering.jl documentation](https://juliastats.org/Clustering.jl/stable/index.html). Use `predict` to get cluster assignments. The dendrogram and the dendrogram cutter are accessed from the machine report (see below).\n\nThis is a static implementation, i.e., it does not generalize to new data instances, and there is no training data. For clusterers that do generalize, see [`KMeans`](@ref) or [`KMedoids`](@ref).\n\nIn MLJ or MLJBase, create a machine with\n\n```\nmach = machine(model)\n```\n\n# Hyper-parameters\n\n * `linkage = :single`: linkage method (:single, :average, :complete, :ward, :ward_presquared)\n * `metric = SqEuclidean`: metric (see `Distances.jl` for available metrics)\n * `branchorder = :r`: branchorder (:r, :barjoseph, :optimal)\n * `h = nothing`: height at which the dendrogram is cut\n * `k = 3`: number of clusters.\n\nIf both `k` and `h` are specified, it is guaranteed that the number of clusters is not less than `k` and their height is not above `h`.\n\n# Operations\n\n * `predict(mach, X)`: return cluster label assignments, as an unordered `CategoricalVector`. Here `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n# Report\n\nAfter calling `predict(mach)`, the fields of `report(mach)` are:\n\n * `dendrogram`: the dendrogram that was computed when calling `predict`.\n * `cutter`: a dendrogram cutter that can be called with a height `h` or a number of clusters `k`, to obtain a new assignment of the data points to clusters (see example below).\n\n# Examples\n\n```julia\nusing MLJ\n\nX, labels = make_moons(400, noise=0.09, rng=1) # synthetic data with 2 clusters; X\n\nHierarchicalClustering = @load HierarchicalClustering pkg=Clustering\nmodel = HierarchicalClustering(linkage = :complete)\nmach = machine(model)\n\n# compute and output cluster assignments for observations in `X`:\nyhat = predict(mach, X)\n\n# plot dendrogram:\nusing StatsPlots\nplot(report(mach).dendrogram)\n\n# make new predictions by cutting the dendrogram at another height\nreport(mach).cutter(h = 2.5)\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":package_url" = "https://github.com/JuliaStats/Clustering.jl" +":package_name" = "Clustering" +":name" = "HierarchicalClustering" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MultivariateStats.RidgeRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJClusteringInterface.DBSCAN] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Real\", \"Int64\", \"Int64\", \"Int64\")`" +":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`(:predict,)`" +":fit_data_scitype" = "`Tuple{}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":prediction_type" = ":unknown" +":load_path" = "MLJClusteringInterface.DBSCAN" +":hyperparameters" = "`(:radius, :leafsize, :min_neighbors, :min_cluster_size)`" +":is_pure_julia" = "`true`" +":human_name" = "DBSCAN clusterer (density-based spatial clustering of applications with noise)" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nDBSCAN\n```\n\nA model type for constructing a DBSCAN clusterer (density-based spatial clustering of applications with noise), based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDBSCAN = @load DBSCAN pkg=Clustering\n```\n\nDo `model = DBSCAN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DBSCAN(radius=...)`.\n\n[DBSCAN](https://en.wikipedia.org/wiki/DBSCAN) is a clustering algorithm that groups together points that are closely packed together (points with many nearby neighbors), marking as outliers points that lie alone in low-density regions (whose nearest neighbors are too far away). More information is available at the [Clustering.jl documentation](https://juliastats.org/Clustering.jl/stable/index.html). Use `predict` to get cluster assignments. Point types - core, boundary or noise - are accessed from the machine report (see below).\n\nThis is a static implementation, i.e., it does not generalize to new data instances, and there is no training data. For clusterers that do generalize, see [`KMeans`](@ref) or [`KMedoids`](@ref).\n\nIn MLJ or MLJBase, create a machine with\n\n```\nmach = machine(model)\n```\n\n# Hyper-parameters\n\n * `radius=1.0`: query radius.\n * `leafsize=20`: number of points binned in each leaf node of the nearest neighbor k-d tree.\n * `min_neighbors=1`: minimum number of a core point neighbors.\n * `min_cluster_size=1`: minimum number of points in a valid cluster.\n\n# Operations\n\n * `predict(mach, X)`: return cluster label assignments, as an unordered `CategoricalVector`. Here `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Note that points of type `noise` will always get a label of `0`.\n\n# Report\n\nAfter calling `predict(mach)`, the fields of `report(mach)` are:\n\n * `point_types`: A `CategoricalVector` with the DBSCAN point type classification, one element per row of `X`. Elements are either `'C'` (core), `'B'` (boundary), or `'N'` (noise).\n * `nclusters`: The number of clusters (excluding the noise \"cluster\")\n * `cluster_labels`: The unique list of cluster labels\n * `clusters`: A vector of `Clustering.DbscanCluster` objects from Clustering.jl, which have these fields:\n\n * `size`: number of points in a cluster (core + boundary)\n * `core_indices`: indices of points in the cluster core\n * `boundary_indices`: indices of points on the cluster boundary\n\n# Examples\n\n```julia\nusing MLJ\n\nX, labels = make_moons(400, noise=0.09, rng=1) # synthetic data with 2 clusters; X\ny = map(labels) do label\n label == 0 ? \"cookie\" : \"monster\"\nend;\ny = coerce(y, Multiclass);\n\nDBSCAN = @load DBSCAN pkg=Clustering\nmodel = DBSCAN(radius=0.13, min_cluster_size=5)\nmach = machine(model)\n\n# compute and output cluster assignments for observations in `X`:\nyhat = predict(mach, X)\n\n# get DBSCAN point types:\nreport(mach).point_types\nreport(mach).nclusters\n\n# compare cluster labels with actual labels:\ncompare = zip(yhat, y) |> collect;\ncompare[1:10] # clusters align with classes\n\n# visualize clusters, noise in red:\npoints = zip(X.x1, X.x2) |> collect\ncolors = map(yhat) do i\n i == 0 ? :red :\n i == 1 ? :blue :\n i == 2 ? :green :\n i == 3 ? :yellow :\n :black\nend\nusing Plots\nscatter(points, color=colors)\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":package_url" = "https://github.com/JuliaStats/Clustering.jl" +":package_name" = "Clustering" +":name" = "DBSCAN" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":name" = "RidgeRegressor" -":human_name" = "ridge regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:lambda, :bias)`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MultivariateStats.KernelPCA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +[MLJClusteringInterface.KMeans] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" +":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJClusteringInterface.KMeans" +":hyperparameters" = "`(:k, :metric, :init)`" +":is_pure_julia" = "`true`" +":human_name" = "K-means clusterer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKMeans\n```\n\nA model type for constructing a K-means clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMeans = @load KMeans pkg=Clustering\n```\n\nDo `model = KMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMeans(k=...)`.\n\n[K-means](http://en.wikipedia.org/wiki/K_means) is a classical method for clustering or vector quantization. It produces a fixed number of clusters, each associated with a *center* (also known as a *prototype*), and each data point is assigned to a cluster with the nearest center.\n\nFrom a mathematical standpoint, K-means is a coordinate descent algorithm that solves the following optimization problem:\n\n$$\n\\text{minimize} \\ \\sum_{i=1}^n \\| \\mathbf{x}_i - \\boldsymbol{\\mu}_{z_i} \\|^2 \\ \\text{w.r.t.} \\ (\\boldsymbol{\\mu}, z)\n$$\n\nHere, $\\boldsymbol{\\mu}_k$ is the center of the $k$-th cluster, and $z_i$ is an index of the cluster for $i$-th point $\\mathbf{x}_i$.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=3`: The number of centroids to use in clustering.\n * `metric::SemiMetric=Distances.SqEuclidean`: The metric used to calculate the clustering. Must have type `PreMetric` from Distances.jl.\n * `init = :kmpp`: One of the following options to indicate how cluster seeds should be initialized:\n\n * `:kmpp`: KMeans++\n * `:kmenc`: K-medoids initialization based on centrality\n * `:rand`: random\n * an instance of `Clustering.SeedingAlgorithm` from Clustering.jl\n * an integer vector of length `k` that provides the indices of points to use as initial cluster centers.\n\n See [documentation of Clustering.jl](https://juliastats.org/Clustering.jl/stable/kmeans.html#Clustering.kmeans).\n\n# Operations\n\n * `predict(mach, Xnew)`: return cluster label assignments, given new features `Xnew` having the same Scitype as `X` above.\n * `transform(mach, Xnew)`: instead return the mean pairwise distances from new samples to the cluster centers.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `centers`: The coordinates of the cluster centers.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `assignments`: The cluster assignments of each point in the training data.\n * `cluster_labels`: The labels assigned to each cluster.\n\n# Examples\n\n```julia\nusing MLJ\nKMeans = @load KMeans pkg=Clustering\n\ntable = load_iris()\ny, X = unpack(table, ==(:target), rng=123)\nmodel = KMeans(k=3)\nmach = machine(model, X) |> fit!\n\nyhat = predict(mach, X)\n@assert yhat == report(mach).assignments\n\ncompare = zip(yhat, y) |> collect;\ncompare[1:8] # clusters align with classes\n\ncenter_dists = transform(mach, fitted_params(mach).centers')\n\n@assert center_dists[1][1] == 0.0\n@assert center_dists[2][2] == 0.0\n@assert center_dists[3][3] == 0.0\n```\n\nSee also [`KMedoids`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/Clustering.jl" +":package_name" = "Clustering" +":name" = "KMeans" ":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.KernelPCA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":name" = "KernelPCA" -":human_name" = "kernel prinicipal component analysis model" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[MultivariateStats.MultitargetRidgeRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJClusteringInterface.AffinityPropagation] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Union{Nothing, Float64}\", \"Distances.SemiMetric\")`" +":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`(:predict,)`" +":fit_data_scitype" = "`Tuple{}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":prediction_type" = ":unknown" +":load_path" = "MLJClusteringInterface.AffinityPropagation" +":hyperparameters" = "`(:damp, :maxiter, :tol, :preference, :metric)`" +":is_pure_julia" = "`true`" +":human_name" = "Affinity Propagation clusterer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nAffinityPropagation\n```\n\nA model type for constructing a Affinity Propagation clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAffinityPropagation = @load AffinityPropagation pkg=Clustering\n```\n\nDo `model = AffinityPropagation()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AffinityPropagation(damp=...)`.\n\n[Affinity Propagation](https://en.wikipedia.org/wiki/Affinity_propagation) is a clustering algorithm based on the concept of \"message passing\" between data points. More information is available at the [Clustering.jl documentation](https://juliastats.org/Clustering.jl/stable/index.html). Use `predict` to get cluster assignments. Indices of the exemplars, their values, etc, are accessed from the machine report (see below).\n\nThis is a static implementation, i.e., it does not generalize to new data instances, and there is no training data. For clusterers that do generalize, see [`KMeans`](@ref) or [`KMedoids`](@ref).\n\nIn MLJ or MLJBase, create a machine with\n\n```\nmach = machine(model)\n```\n\n# Hyper-parameters\n\n * `damp = 0.5`: damping factor\n * `maxiter = 200`: maximum number of iteration\n * `tol = 1e-6`: tolerance for converenge\n * `preference = nothing`: the (single float) value of the diagonal elements of the similarity matrix. If unspecified, choose median (negative) similarity of all pairs as mentioned [here](https://en.wikipedia.org/wiki/Affinity_propagation#Algorithm)\n * `metric = Distances.SqEuclidean()`: metric (see `Distances.jl` for available metrics)\n\n# Operations\n\n * `predict(mach, X)`: return cluster label assignments, as an unordered `CategoricalVector`. Here `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n# Report\n\nAfter calling `predict(mach)`, the fields of `report(mach)` are:\n\n * exemplars: indices of the data picked as exemplars in `X`\n * centers: positions of the exemplars in the feature space\n * cluster_labels: labels of clusters given to each datum in `X`\n * iterations: the number of iteration run by the algorithm\n * converged: whether or not the algorithm converges by the maximum iteration\n\n# Examples\n\n```\nusing MLJ\n\nX, labels = make_moons(400, noise=0.9, rng=1)\n\nAffinityPropagation = @load AffinityPropagation pkg=Clustering\nmodel = AffinityPropagation(preference=-10.0)\nmach = machine(model)\n\n# compute and output cluster assignments for observations in `X`:\nyhat = predict(mach, X)\n\n# Get the positions of the exemplars\nreport(mach).centers\n\n# Plot clustering result\nusing GLMakie\nscatter(MLJ.matrix(X)', color=yhat.refs)\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":package_url" = "https://github.com/JuliaStats/Clustering.jl" +":package_name" = "Clustering" +":name" = "AffinityPropagation" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" -":name" = "MultitargetRidgeRegressor" -":human_name" = "multitarget ridge regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:lambda, :bias)`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MultivariateStats.SubspaceLDA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJClusteringInterface.KMedoids] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" +":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":name" = "SubspaceLDA" -":human_name" = "subpace LDA model" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:normalize, :outdim, :dist)`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MultivariateStats.BayesianLDA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "MLJClusteringInterface.KMedoids" +":hyperparameters" = "`(:k, :metric, :init)`" ":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":name" = "BayesianLDA" -":human_name" = "Bayesian LDA model" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":human_name" = "K-medoids clusterer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MultivariateStats.PCA] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":docstring" = """```\nKMedoids\n```\n\nA model type for constructing a K-medoids clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMedoids = @load KMedoids pkg=Clustering\n```\n\nDo `model = KMedoids()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMedoids(k=...)`.\n\n[K-medoids](http://en.wikipedia.org/wiki/K-medoids) is a clustering algorithm that works by finding $k$ data points (called *medoids*) such that the total distance between each data point and the closest *medoid* is minimal.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=3`: The number of centroids to use in clustering.\n * `metric::SemiMetric=Distances.SqEuclidean`: The metric used to calculate the clustering. Must have type `PreMetric` from Distances.jl.\n * `init` (defaults to `:kmpp`): how medoids should be initialized, could be one of the following:\n\n * `:kmpp`: KMeans++\n * `:kmenc`: K-medoids initialization based on centrality\n * `:rand`: random\n * an instance of `Clustering.SeedingAlgorithm` from Clustering.jl\n * an integer vector of length `k` that provides the indices of points to use as initial medoids.\n\n See [documentation of Clustering.jl](https://juliastats.org/Clustering.jl/stable/kmedoids.html#Clustering.kmedoids).\n\n# Operations\n\n * `predict(mach, Xnew)`: return cluster label assignments, given new features `Xnew` having the same Scitype as `X` above.\n * `transform(mach, Xnew)`: instead return the mean pairwise distances from new samples to the cluster centers.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `medoids`: The coordinates of the cluster medoids.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `assignments`: The cluster assignments of each point in the training data.\n * `cluster_labels`: The labels assigned to each cluster.\n\n# Examples\n\n```julia\nusing MLJ\nKMedoids = @load KMedoids pkg=Clustering\n\ntable = load_iris()\ny, X = unpack(table, ==(:target), rng=123)\nmodel = KMedoids(k=3)\nmach = machine(model, X) |> fit!\n\nyhat = predict(mach, X)\n@assert yhat == report(mach).assignments\n\ncompare = zip(yhat, y) |> collect;\ncompare[1:8] # clusters align with classes\n\ncenter_dists = transform(mach, fitted_params(mach).medoids')\n\n@assert center_dists[1][1] == 0.0\n@assert center_dists[2][2] == 0.0\n@assert center_dists[3][3] == 0.0\n```\n\nSee also [`KMeans`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/Clustering.jl" +":package_name" = "Clustering" +":name" = "KMedoids" ":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MultivariateStats" -":package_license" = "MIT" -":load_path" = "MLJMultivariateStatsInterface.PCA" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":name" = "PCA" -":human_name" = "pca" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[DecisionTree.AdaBoostStumpClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" +[MLJBalancing.BalancedBaggingClassifier] +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\")`" +":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJBalancing.BalancedBaggingClassifier" +":hyperparameters" = "`(:model, :T, :rng)`" ":is_pure_julia" = "`true`" -":package_name" = "DecisionTree" -":package_license" = "MIT" -":load_path" = "MLJDecisionTreeInterface.AdaBoostStumpClassifier" -":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" -":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nAdaBoostStumpClassifier\n```\n\nA model type for constructing a Ada-boosted stump classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAdaBoostStumpClassifier = @load AdaBoostStumpClassifier pkg=DecisionTree\n```\n\nDo `model = AdaBoostStumpClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AdaBoostStumpClassifier(n_iter=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere:\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `n_iter=10`: number of iterations of AdaBoost\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted Parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `stumps`: the `Ensemble` object returned by the core DecisionTree.jl algorithm.\n * `coefficients`: the stump coefficients (one per stump)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nBooster = @load AdaBoostStumpClassifier pkg=DecisionTree\nbooster = Booster(n_iter=15)\n\nX, y = @load_iris\nmach = machine(booster, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).stumps # raw `Ensemble` object from DecisionTree.jl\nfitted_params(mach).coefs # coefficient associated with each stump\nfeature_importances(mach)\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.AdaBoostStumpClassifier`](@ref).\n""" -":name" = "AdaBoostStumpClassifier" -":human_name" = "Ada-boosted stump classifier" +":human_name" = "balanced bagging classifier" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":feature_importances"] -":hyperparameters" = "`(:n_iter, :feature_importance, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[DecisionTree.DecisionTreeRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```\nBalancedBaggingClassifier\n```\n\nA model type for constructing a balanced bagging classifier, based on [MLJBalancing.jl](https://github.com/JuliaAI/MLJBalancing).\n\nFrom MLJ, the type can be imported using\n\n`BalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing`\n\nConstruct an instance with default hyper-parameters using the syntax `bagging_model = BalancedBaggingClassifier(model=...)`\n\nGiven a probablistic classifier.`BalancedBaggingClassifier` performs bagging by undersampling only majority data in each bag so that its includes as much samples as in the minority data. This is proposed with an Adaboost classifier where the output scores are averaged in the paper Xu-Ying Liu, Jianxin Wu, & Zhi-Hua Zhou. (2009). Exploratory Undersampling for Class-Imbalance Learning. IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), 39 (2), 539–5501\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: input features of a form supported by the `model` being wrapped (typically a table, e.g., `DataFrame`, with `Continuous` columns will be supported, as a minimum)\n * `y`: the binary target, which can be any `AbstractVector` where `length(unique(y)) == 2`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `model::Probabilistic`: The classifier to use to train on each bag.\n * `T::Integer=0`: The number of bags to be used in the ensemble. If not given, will be set as the ratio between the frequency of the majority and minority classes. Can be later found in `report(mach)`.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if Julia `VERSION>=1.7`. Otherwise, uses MersenneTwister`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n\n * `predict_mode(mach, Xnew)`: return the mode of each prediction above\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# Load base classifier and BalancedBaggingClassifier\nBalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\n# Construct the base classifier and use it to construct a BalancedBaggingClassifier\nlogistic_model = LogisticClassifier()\nmodel = BalancedBaggingClassifier(model=logistic_model, T=5)\n\n# Load the data and train the BalancedBaggingClassifier\nX, y = Imbalance.generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2],\n class_probs = [0.9, 0.1],\n type = \"ColTable\",\n rng=42)\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇ 16 (19.0%)\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 84 (100.0%)\n\nmach = machine(model, X, y) |> fit!\n\n# Predict using the trained model\n\nyhat = predict(mach, X) # probabilistic predictions\npredict_mode(mach, X) # point predictions\n```\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" +":package_name" = "MLJBalancing" +":name" = "BalancedBaggingClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "DecisionTree" -":package_license" = "MIT" -":load_path" = "MLJDecisionTreeInterface.DecisionTreeRegressor" -":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" -":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nDecisionTreeRegressor\n```\n\nA model type for constructing a CART decision tree regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\n```\n\nDo `model = DecisionTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree or stump object returned by the core DecisionTree.jl algorithm\n * `features`: the names of the features encountered in training\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nDecisionTreeRegressor = @load DecisionTreeRegressor pkg=DecisionTree\nmodel = DecisionTreeRegressor(max_depth=3, min_samples_split=3)\n\nX, y = make_regression(100, 4; rng=123) # synthetic data\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2; rng=123)\nyhat = predict(mach, Xnew) # new predictions\n\njulia> fitted_params(mach).tree\nx1 < 0.2758\n├─ x2 < 0.9137\n│ ├─ x1 < -0.9582\n│ │ ├─ 0.9189256882087312 (0/12)\n│ │ └─ -0.23180616021065256 (0/38)\n│ └─ -1.6461153800037722 (0/9)\n└─ x1 < 1.062\n ├─ x2 < -0.4969\n │ ├─ -0.9330755147107384 (0/5)\n │ └─ -2.3287967825015548 (0/17)\n └─ x2 < 0.4598\n ├─ -2.931299926506291 (0/11)\n └─ -4.726518740473489 (0/8)\n\nfeature_importances(mach) # get feature importances\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeRegressor`](@ref).\n""" -":name" = "DecisionTreeRegressor" -":human_name" = "CART decision tree regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":feature_importances"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :feature_importance, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":implemented_methods" = [] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[DecisionTree.DecisionTreeClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "DecisionTree" +":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" + +[MLJBalancing.BalancedModel] +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Probabilistic\")`" +":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":load_path" = "MLJDecisionTreeInterface.DecisionTreeClassifier" -":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" -":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nDecisionTreeClassifier\n```\n\nA model type for constructing a CART decision tree classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\n```\n\nDo `model = DecisionTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DecisionTreeClassifier(max_depth=...)`.\n\n`DecisionTreeClassifier` implements the [CART algorithm](https://en.wikipedia.org/wiki/Decision_tree_learning), originally published in Breiman, Leo; Friedman, J. H.; Olshen, R. A.; Stone, C. J. (1984): \"Classification and regression trees\". *Monterey, CA: Wadsworth & Brooks/Cole Advanced Books & Software.*.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: max number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=0`: number of features to select at random (0 for all)\n * `post_prune=false`: set to `true` for post-fit pruning\n * `merge_purity_threshold=1.0`: (post-pruning) merge leaves having combined purity `>= merge_purity_threshold`\n * `display_depth=5`: max depth to show when displaying the tree\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `raw_tree`: the raw `Node`, `Leaf` or `Root` object returned by the core DecisionTree.jl algorithm\n * `tree`: a visualizable, wrapped version of `raw_tree` implementing the AbstractTrees.jl interface; see \"Examples\" below\n * `encoding`: dictionary of target classes keyed on integers used internally by DecisionTree.jl\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes_seen`: list of target classes actually observed in training\n * `print_tree`: alternative method to print the fitted tree, with single argument the tree depth; interpretation requires internal integer-class encoding (see \"Fitted parameters\" above).\n * `features`: the names of the features encountered in training, in an order consistent with the output of `print_tree` (see below)\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nDecisionTreeClassifier = @load DecisionTreeClassifier pkg=DecisionTree\nmodel = DecisionTreeClassifier(max_depth=3, min_samples_split=3)\n\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\njulia> tree = fitted_params(mach).tree\npetal_length < 2.45\n├─ setosa (50/50)\n└─ petal_width < 1.75\n ├─ petal_length < 4.95\n │ ├─ versicolor (47/48)\n │ └─ virginica (4/6)\n └─ petal_length < 4.85\n ├─ virginica (2/3)\n └─ virginica (43/43)\n\nusing Plots, TreeRecipe\nplot(tree) # for a graphical representation of the tree\n\nfeature_importances(mach)\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.DecisionTreeClassifier`](@ref).\n""" -":name" = "DecisionTreeClassifier" -":human_name" = "CART decision tree classifier" -":is_supervised" = "`true`" ":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":feature_importances"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :post_prune, :merge_purity_threshold, :display_depth, :feature_importance, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":load_path" = "MLJBalancing.BalancedModel" +":hyperparameters" = "`(:balancers, :model)`" +":is_pure_julia" = "`false`" +":human_name" = "balanced model probabilistic" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[DecisionTree.RandomForestRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" +":package_name" = "MLJBalancing" +":name" = "BalancedModel" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "DecisionTree" -":package_license" = "MIT" -":load_path" = "MLJDecisionTreeInterface.RandomForestRegressor" -":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" -":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nRandomForestRegressor\n```\n\nA model type for constructing a CART random forest regressor, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n```\n\nDo `model = RandomForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestRegressor(max_depth=...)`.\n\n`DecisionTreeRegressor` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestRegressor pkg=DecisionTree\nforest = Forest(max_depth=4, min_samples_split=3)\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(forest, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTree.jl\nfeature_importances(mach)\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestRegressor`](@ref).\n""" -":name" = "RandomForestRegressor" -":human_name" = "CART random forest regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update", ":feature_importances"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :feature_importance, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":n_trees" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":implemented_methods" = [":getproperty", ":propertynames", ":setproperty!"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`BalancedModel`" -[DecisionTree.RandomForestClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "DecisionTree" -":package_license" = "MIT" -":load_path" = "MLJDecisionTreeInterface.RandomForestClassifier" -":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" -":package_url" = "https://github.com/bensadeghi/DecisionTree.jl" +[Imbalance.RandomOversampler] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nRandomForestClassifier\n```\n\nA model type for constructing a CART random forest classifier, based on [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomForestClassifier = @load RandomForestClassifier pkg=DecisionTree\n```\n\nDo `model = RandomForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomForestClassifier(max_depth=...)`.\n\n`RandomForestClassifier` implements the standard [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest), originally published in Breiman, L. (2001): \"Random Forests.\", *Machine Learning*, vol. 45, pp. 5–32.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `max_depth=-1`: max depth of the decision tree (-1=any)\n * `min_samples_leaf=1`: min number of samples each leaf needs to have\n * `min_samples_split=2`: min number of samples needed for a split\n * `min_purity_increase=0`: min purity needed for a split\n * `n_subfeatures=-1`: number of features to select at random (0 for all, -1 for square root of number of features)\n * `n_trees=10`: number of trees to train\n * `sampling_fraction=0.7` fraction of samples to train each tree on\n * `feature_importance`: method to use for computing feature importances. One of `(:impurity, :split)`\n * `rng=Random.GLOBAL_RNG`: random number generator or seed\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `forest`: the `Ensemble` object returned by the core DecisionTree.jl algorithm\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features`: the names of the features encountered in training\n\n# Accessor functions\n\n * `feature_importances(mach)` returns a vector of `(feature::Symbol => importance)` pairs; the type of importance is determined by the hyperparameter `feature_importance` (see above)\n\n# Examples\n\n```\nusing MLJ\nForest = @load RandomForestClassifier pkg=DecisionTree\nforest = Forest(min_samples_split=6, n_subfeatures=3)\n\nX, y = @load_iris\nmach = machine(forest, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\n\nfitted_params(mach).forest # raw `Ensemble` object from DecisionTrees.jl\n\nfeature_importances(mach) # `:impurity` feature importances\nforest.feature_importance = :split\nfeature_importance(mach) # `:split` feature importances\n\n```\n\nSee also [DecisionTree.jl](https://github.com/bensadeghi/DecisionTree.jl) and the unwrapped model type [`MLJDecisionTreeInterface.DecisionTree.RandomForestClassifier`](@ref).\n""" -":name" = "RandomForestClassifier" -":human_name" = "CART random forest classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update", ":feature_importances"] -":hyperparameters" = "`(:max_depth, :min_samples_leaf, :min_samples_split, :min_purity_increase, :n_subfeatures, :n_trees, :sampling_fraction, :feature_importance, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":n_trees" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJBalancing.BalancedBaggingClassifier] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJBalancing" -":package_license" = "unknown" -":load_path" = "MLJBalancing.BalancedBaggingClassifier" -":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" -":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nBalancedBaggingClassifier\n```\n\nA model type for constructing a balanced bagging classifier, based on [MLJBalancing.jl](https://github.com/JuliaAI/MLJBalancing).\n\nFrom MLJ, the type can be imported using\n\n`BalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing`\n\nConstruct an instance with default hyper-parameters using the syntax `bagging_model = BalancedBaggingClassifier(model=...)`\n\nGiven a probablistic classifier.`BalancedBaggingClassifier` performs bagging by undersampling only majority data in each bag so that its includes as much samples as in the minority data. This is proposed with an Adaboost classifier where the output scores are averaged in the paper Xu-Ying Liu, Jianxin Wu, & Zhi-Hua Zhou. (2009). Exploratory Undersampling for Class-Imbalance Learning. IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), 39 (2), 539–5501\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: input features of a form supported by the `model` being wrapped (typically a table, e.g., `DataFrame`, with `Continuous` columns will be supported, as a minimum)\n * `y`: the binary target, which can be any `AbstractVector` where `length(unique(y)) == 2`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n * `model::Probabilistic`: The classifier to use to train on each bag.\n * `T::Integer=0`: The number of bags to be used in the ensemble. If not given, will be set as the ratio between the frequency of the majority and minority classes. Can be later found in `report(mach)`.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if Julia `VERSION>=1.7`. Otherwise, uses MersenneTwister`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n\n * `predict_mode(mach, Xnew)`: return the mode of each prediction above\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# Load base classifier and BalancedBaggingClassifier\nBalancedBaggingClassifier = @load BalancedBaggingClassifier pkg=MLJBalancing\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\n# Construct the base classifier and use it to construct a BalancedBaggingClassifier\nlogistic_model = LogisticClassifier()\nmodel = BalancedBaggingClassifier(model=logistic_model, T=5)\n\n# Load the data and train the BalancedBaggingClassifier\nX, y = Imbalance.generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2],\n class_probs = [0.9, 0.1],\n type = \"ColTable\",\n rng=42)\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇ 16 (19.0%)\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 84 (100.0%)\n\nmach = machine(model, X, y) |> fit!\n\n# Predict using the trained model\n\nyhat = predict(mach, X) # probabilistic predictions\npredict_mode(mach, X) # point predictions\n```\n""" -":name" = "BalancedBaggingClassifier" -":human_name" = "balanced bagging classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :T, :rng)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\")`" +":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" - -[MLJBalancing.BalancedModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJBalancing" -":package_license" = "MIT" -":load_path" = "MLJBalancing.BalancedModel" -":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" -":package_url" = "https://github.com/JuliaAI/MLJBalancing.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nBalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)\nBalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)\n```\n\nGiven a classification model, and one or more balancer models that all implement the `MLJModelInterface`, `BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models and a classifier together in a sequential pipeline.\n\n# Operation\n\n * During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer is then passed to the classifier for training.\n * During prediction, the balancers have no effect.\n\n# Arguments\n\n * `model::Supervised`: A classification model that implements the `MLJModelInterface`.\n * `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.\n * `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.\n * and so on for an arbitrary number of balancers.\n\n# Returns\n\n * An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.\n\n# Example\n\n```julia\nusing MLJ\nusing Imbalance\n\n# generate data\nX, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])\n\n# prepare classification and balancing models\nSMOTENC = @load SMOTENC pkg=Imbalance verbosity=0\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0\nLogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0\n\noversampler = SMOTENC(k=5, ratios=1.0, rng=42)\nundersampler = TomekUndersampler(min_ratios=0.5, rng=42)\nlogistic_model = LogisticClassifier()\n\n# wrap them in a BalancedModel\nbalanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)\n\n# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.\nmach = machine(balanced_model, X, y)\nfit!(mach)\n```\n""" -":name" = "BalancedModel" -":human_name" = "balanced model probabilistic" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":getproperty", ":propertynames", ":setproperty!"] -":hyperparameters" = "`(:balancers, :model)`" -":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Probabilistic\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict, :predict_mean, :predict_mode, :predict_median, :predict_joint, :transform, :inverse_transform)`" -":constructor" = "`BalancedModel`" - -[Imbalance.RandomOversampler] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.RandomOversampler" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "random oversampler" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a random oversampling model with the given hyper-parameters.\n\n```\nRandomOversampler\n```\n\nA model type for constructing a random oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n```\n\nDo `model = RandomOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomOversampler(ratios=...)`.\n\n`RandomOversampler` implements naive oversampling by repeating existing observations with replacement.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomOverSampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load RandomOversampler\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n\n# wrap the model in a machine\noversampler = RandomOversampler(ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "RandomOversampler" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a random oversampling model with the given hyper-parameters.\n\n```\nRandomOversampler\n```\n\nA model type for constructing a random oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n```\n\nDo `model = RandomOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomOversampler(ratios=...)`.\n\n`RandomOversampler` implements naive oversampling by repeating existing observations with replacement.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomOverSampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load RandomOversampler\nRandomOversampler = @load RandomOversampler pkg=Imbalance\n\n# wrap the model in a machine\noversampler = RandomOversampler(ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" -":name" = "RandomOversampler" -":human_name" = "random oversampler" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":transform"] -":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.SMOTENC] -":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":output_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Any\", \"AbstractString\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.SMOTENC" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:k, :ratios, :knn_tree, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "smotenc" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a SMOTENC model with the given hyper-parameters.\n\n```\nSMOTENC\n```\n\nA model type for constructing a smotenc, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTENC = @load SMOTENC pkg=Imbalance\n```\n\nDo `model = SMOTENC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTENC(k=...)`.\n\n`SMOTENC` implements the SMOTENC algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTE: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTENC()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTENC algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `knn_tree`: Decides the tree used in KNN computations. Either `\"Brute\"` or `\"Ball\"`. BallTree can be much faster but may lead to inaccurate results.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTENC, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load SMOTE-NC\nSMOTENC = @load SMOTENC pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTENC(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "SMOTENC" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a SMOTENC model with the given hyper-parameters.\n\n```\nSMOTENC\n```\n\nA model type for constructing a smotenc, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTENC = @load SMOTENC pkg=Imbalance\n```\n\nDo `model = SMOTENC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTENC(k=...)`.\n\n`SMOTENC` implements the SMOTENC algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTE: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTENC()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTENC algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `knn_tree`: Decides the tree used in KNN computations. Either `\"Brute\"` or `\"Ball\"`. BallTree can be much faster but may lead to inaccurate results.\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTENC, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load SMOTE-NC\nSMOTENC = @load SMOTENC pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTENC(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" -":name" = "SMOTENC" -":human_name" = "smotenc" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] -":hyperparameters" = "`(:k, :ratios, :knn_tree, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Integer\", \"Any\", \"AbstractString\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" +":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.TomekUndersampler] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.TomekUndersampler" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:min_ratios, :force_min_ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "tomek undersampler" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a tomek undersampling model with the given hyper-parameters.\n\n```\nTomekUndersampler\n```\n\nA model type for constructing a tomek undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance\n```\n\nDo `model = TomekUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TomekUndersampler(min_ratios=...)`.\n\n`TomekUndersampler` undersamples by removing any point that is part of a tomek link in the data. As defined in, Ivan Tomek. Two modifications of cnn. IEEE Trans. Systems, Man and Cybernetics, 6:769–772, 1976.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = TomekUndersampler()\n\n# Hyperparameters\n\n * `min_ratios=1.0`: A parameter that controls the maximum amount of undersampling to be done for each class. If this algorithm cleans the data to an extent that this is violated, some of the cleaned points will be revived randomly so that it is satisfied.\n\n * Can be a float and in this case each class will be at most undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float minimum ratio for that class\n\n * `force_min_ratios=false`: If `true`, and this algorithm cleans the data such that the ratios for each class exceed those specified in `min_ratios` then further undersampling will be perform so that the final ratios are equal to `min_ratios`.\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n * `try_preserve_type::Bool=true`: When `true`, the function will try to not change the type of the input table (e.g., `DataFrame`). However, for some tables, this may not succeed, and in this case, the table returned will be a column table (named-tuple of vectors). This parameter is ignored if the input is a matrix.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using TomekUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n min_sep=0.01, stds=[3.0 3.0 3.0], class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load TomekUndersampler model type:\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance\n\n# Underample the majority classes to sizes relative to the minority class:\ntomek_undersampler = TomekUndersampler(min_ratios=1.0, rng=42)\nmach = machine(tomek_undersampler)\nX_under, y_under = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 22 (115.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 36 (189.5%)\n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "TomekUndersampler" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a tomek undersampling model with the given hyper-parameters.\n\n```\nTomekUndersampler\n```\n\nA model type for constructing a tomek undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance\n```\n\nDo `model = TomekUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TomekUndersampler(min_ratios=...)`.\n\n`TomekUndersampler` undersamples by removing any point that is part of a tomek link in the data. As defined in, Ivan Tomek. Two modifications of cnn. IEEE Trans. Systems, Man and Cybernetics, 6:769–772, 1976.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = TomekUndersampler()\n\n# Hyperparameters\n\n * `min_ratios=1.0`: A parameter that controls the maximum amount of undersampling to be done for each class. If this algorithm cleans the data to an extent that this is violated, some of the cleaned points will be revived randomly so that it is satisfied.\n\n * Can be a float and in this case each class will be at most undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float minimum ratio for that class\n\n * `force_min_ratios=false`: If `true`, and this algorithm cleans the data such that the ratios for each class exceed those specified in `min_ratios` then further undersampling will be perform so that the final ratios are equal to `min_ratios`.\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n * `try_preserve_type::Bool=true`: When `true`, the function will try to not change the type of the input table (e.g., `DataFrame`). However, for some tables, this may not succeed, and in this case, the table returned will be a column table (named-tuple of vectors). This parameter is ignored if the input is a matrix.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using TomekUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n min_sep=0.01, stds=[3.0 3.0 3.0], class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load TomekUndersampler model type:\nTomekUndersampler = @load TomekUndersampler pkg=Imbalance\n\n# Underample the majority classes to sizes relative to the minority class:\ntomek_undersampler = TomekUndersampler(min_ratios=1.0, rng=42)\nmach = machine(tomek_undersampler)\nX_under, y_under = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 22 (115.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 36 (189.5%)\n```\n""" -":name" = "TomekUndersampler" -":human_name" = "tomek undersampler" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":transform"] -":hyperparameters" = "`(:min_ratios, :force_min_ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.ClusterUndersampler] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"AbstractString\", \"Any\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.ClusterUndersampler" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:mode, :ratios, :maxiter, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "cluster undersampler" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a cluster undersampling model with the given hyper-parameters.\n\n```\nClusterUndersampler\n```\n\nA model type for constructing a cluster undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nClusterUndersampler = @load ClusterUndersampler pkg=Imbalance\n```\n\nDo `model = ClusterUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ClusterUndersampler(mode=...)`.\n\n`ClusterUndersampler` implements clustering undersampling as presented in Wei-Chao, L., Chih-Fong, T., Ya-Han, H., & Jing-Shang, J. (2017). Clustering-based undersampling in class-imbalanced data. Information Sciences, 409–410, 17–26. with K-means as the clustering algorithm.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by \tmach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed with `model = ClusterUndersampler()`.\n\n# Hyperparameters\n\n * `mode::AbstractString=\"nearest`: If `\"center\"` then the undersampled data will consist of the centriods of\n\n```\neach cluster found; if `\"nearest\"` then it will consist of the nearest neighbor of each centroid.\n```\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `maxiter::Integer=100`: Maximum number of iterations to run K-means\n * `rng::Integer=42`: Random number generator seed. Must be an integer.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using ClusterUndersampler, returning the undersampled versions\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n \njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load cluster_undersampling\nClusterUndersampler = @load ClusterUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = ClusterUndersampler(mode=\"nearest\", \n ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%)\n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "ClusterUndersampler" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a cluster undersampling model with the given hyper-parameters.\n\n```\nClusterUndersampler\n```\n\nA model type for constructing a cluster undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nClusterUndersampler = @load ClusterUndersampler pkg=Imbalance\n```\n\nDo `model = ClusterUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ClusterUndersampler(mode=...)`.\n\n`ClusterUndersampler` implements clustering undersampling as presented in Wei-Chao, L., Chih-Fong, T., Ya-Han, H., & Jing-Shang, J. (2017). Clustering-based undersampling in class-imbalanced data. Information Sciences, 409–410, 17–26. with K-means as the clustering algorithm.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by \tmach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed with `model = ClusterUndersampler()`.\n\n# Hyperparameters\n\n * `mode::AbstractString=\"nearest`: If `\"center\"` then the undersampled data will consist of the centriods of\n\n```\neach cluster found; if `\"nearest\"` then it will consist of the nearest neighbor of each centroid.\n```\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `maxiter::Integer=100`: Maximum number of iterations to run K-means\n * `rng::Integer=42`: Random number generator seed. Must be an integer.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using ClusterUndersampler, returning the undersampled versions\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n \njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load cluster_undersampling\nClusterUndersampler = @load ClusterUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = ClusterUndersampler(mode=\"nearest\", \n ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%)\n```\n""" -":name" = "ClusterUndersampler" -":human_name" = "cluster undersampler" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":transform"] -":hyperparameters" = "`(:mode, :ratios, :maxiter, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"AbstractString\", \"Any\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.SMOTE] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.SMOTE" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "smote" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a SMOTE model with the given hyper-parameters.\n\n```\nSMOTE\n```\n\nA model type for constructing a smote, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTE = @load SMOTE pkg=Imbalance\n```\n\nDo `model = SMOTE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTE(k=...)`.\n\n`SMOTE` implements the SMOTE algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTE: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTE()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTE algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTE, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load SMOTE\nSMOTE = @load SMOTE pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTE(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "SMOTE" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a SMOTE model with the given hyper-parameters.\n\n```\nSMOTE\n```\n\nA model type for constructing a smote, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTE = @load SMOTE pkg=Imbalance\n```\n\nDo `model = SMOTE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTE(k=...)`.\n\n`SMOTE` implements the SMOTE algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTE: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTE()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTE algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTE, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load SMOTE\nSMOTE = @load SMOTE pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTE(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n```\n""" -":name" = "SMOTE" -":human_name" = "smote" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] -":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.SMOTEN] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.SMOTEN" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "smoten" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of integers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Finite`. That is, for table inputs each column should have either `OrderedFactor` or `Multiclass` as the element [scitype](https://juliaai.github.io/ScientificTypes.jl/).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "SMOTEN" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of integers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Finite`. That is, for table inputs each column should have either `OrderedFactor` or `Multiclass` as the element [scitype](https://juliaai.github.io/ScientificTypes.jl/).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" -":name" = "SMOTEN" -":human_name" = "smoten" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] -":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.ROSE] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"AbstractFloat\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.ROSE" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:s, :ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "rose" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a ROSE model with the given hyper-parameters.\n\n```\nROSE\n```\n\nA model type for constructing a rose, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nROSE = @load ROSE pkg=Imbalance\n```\n\nDo `model = ROSE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ROSE(s=...)`.\n\n`ROSE` implements the ROSE (Random Oversampling Examples) algorithm to correct for class imbalance as in G Menardi, N. Torelli, “Training and assessing classification rules with imbalanced data,” Data Mining and Knowledge Discovery, 28(1), pp.92-122, 2014.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = ROSE()\n\n# Hyperparameters\n\n * `s::float`: A parameter that proportionally controls the bandwidth of the Gaussian kernel\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using ROSE, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load ROSE\nROSE = @load ROSE pkg=Imbalance\n\n# wrap the model in a machine\noversampler = ROSE(s=0.3, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "ROSE" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a ROSE model with the given hyper-parameters.\n\n```\nROSE\n```\n\nA model type for constructing a rose, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nROSE = @load ROSE pkg=Imbalance\n```\n\nDo `model = ROSE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ROSE(s=...)`.\n\n`ROSE` implements the ROSE (Random Oversampling Examples) algorithm to correct for class imbalance as in G Menardi, N. Torelli, “Training and assessing classification rules with imbalanced data,” Data Mining and Knowledge Discovery, 28(1), pp.92-122, 2014.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = ROSE()\n\n# Hyperparameters\n\n * `s::float`: A parameter that proportionally controls the bandwidth of the Gaussian kernel\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using ROSE, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n# load ROSE\nROSE = @load ROSE pkg=Imbalance\n\n# wrap the model in a machine\noversampler = ROSE(s=0.3, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" -":name" = "ROSE" -":human_name" = "rose" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] -":hyperparameters" = "`(:s, :ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"AbstractFloat\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.RandomUndersampler] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.RandomUndersampler" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "random undersampler" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "RandomUndersampler" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n""" -":name" = "RandomUndersampler" -":human_name" = "random undersampler" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":transform"] -":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.ENNUndersampler] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"AbstractString\", \"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.ENNUndersampler" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" -":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """Initiate a ENN undersampling model with the given hyper-parameters.\n\n```\nENNUndersampler\n```\n\nA model type for constructing a enn undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nENNUndersampler = @load ENNUndersampler pkg=Imbalance\n```\n\nDo `model = ENNUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ENNUndersampler(k=...)`.\n\n`ENNUndersampler` undersamples a dataset by removing (\"cleaning\") points that violate a certain condition such as having a different class compared to the majority of the neighbors as proposed in Dennis L Wilson. Asymptotic properties of nearest neighbor rules using edited data. IEEE Transactions on Systems, Man, and Cybernetics, pages 408–421, 1972.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by \tmach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by \tmodel = ENNUndersampler()\n\n# Hyperparameters\n\n * `k::Integer=5`: Number of nearest neighbors to consider in the algorithm. Should be within the range `0 < k < n` where n is the number of observations in the smallest class.\n\n * `keep_condition::AbstractString=\"mode\"`: The condition that leads to cleaning a point upon violation. Takes one of `\"exists\"`, `\"mode\"`, `\"only mode\"` and `\"all\"`\n\n```\n- `\"exists\"`: the point has at least one neighbor from the same class\n- `\"mode\"`: the class of the point is one of the most frequent classes of the neighbors (there may be many)\n- `\"only mode\"`: the class of the point is the single most frequent class of the neighbors\n- `\"all\"`: the class of the point is the same as all the neighbors\n```\n\n * `min_ratios=1.0`: A parameter that controls the maximum amount of undersampling to be done for each class. If this algorithm cleans the data to an extent that this is violated, some of the cleaned points will be revived randomly so that it is satisfied.\n\n * Can be a float and in this case each class will be at most undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float minimum ratio for that class\n\n * `force_min_ratios=false`: If `true`, and this algorithm cleans the data such that the ratios for each class exceed those specified in `min_ratios` then further undersampling will be perform so that the final ratios are equal to `min_ratios`.\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n * `try_preserve_type::Bool=true`: When `true`, the function will try to not change the type of the input table (e.g., `DataFrame`). However, for some tables, this may not succeed, and in this case, the table returned will be a column table (named-tuple of vectors). This parameter is ignored if the input is a matrix.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using ENNUndersampler, returning the undersampled versions\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n min_sep=0.01, stds=[3.0 3.0 3.0], class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load ENN model type:\nENNUndersampler = @load ENNUndersampler pkg=Imbalance\n\n# underample the majority classes to sizes relative to the minority class:\nundersampler = ENNUndersampler(min_ratios=0.5, rng=42)\nmach = machine(undersampler)\nX_under, y_under = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 10 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 10 (100.0%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 24 (240.0%) \n```\n""" -":name" = "ENNUndersampler" +":hyperparameters" = "`(:k, :keep_condition, :min_ratios, :force_min_ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" ":human_name" = "enn undersampler" ":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" -":implemented_methods" = [":transform_scitype", ":transform"] -":hyperparameters" = "`(:k, :keep_condition, :min_ratios, :force_min_ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Integer\", \"AbstractString\", \"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a ENN undersampling model with the given hyper-parameters.\n\n```\nENNUndersampler\n```\n\nA model type for constructing a enn undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nENNUndersampler = @load ENNUndersampler pkg=Imbalance\n```\n\nDo `model = ENNUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ENNUndersampler(k=...)`.\n\n`ENNUndersampler` undersamples a dataset by removing (\"cleaning\") points that violate a certain condition such as having a different class compared to the majority of the neighbors as proposed in Dennis L Wilson. Asymptotic properties of nearest neighbor rules using edited data. IEEE Transactions on Systems, Man, and Cybernetics, pages 408–421, 1972.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by \tmach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by \tmodel = ENNUndersampler()\n\n# Hyperparameters\n\n * `k::Integer=5`: Number of nearest neighbors to consider in the algorithm. Should be within the range `0 < k < n` where n is the number of observations in the smallest class.\n\n * `keep_condition::AbstractString=\"mode\"`: The condition that leads to cleaning a point upon violation. Takes one of `\"exists\"`, `\"mode\"`, `\"only mode\"` and `\"all\"`\n\n```\n- `\"exists\"`: the point has at least one neighbor from the same class\n- `\"mode\"`: the class of the point is one of the most frequent classes of the neighbors (there may be many)\n- `\"only mode\"`: the class of the point is the single most frequent class of the neighbors\n- `\"all\"`: the class of the point is the same as all the neighbors\n```\n\n * `min_ratios=1.0`: A parameter that controls the maximum amount of undersampling to be done for each class. If this algorithm cleans the data to an extent that this is violated, some of the cleaned points will be revived randomly so that it is satisfied.\n\n * Can be a float and in this case each class will be at most undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float minimum ratio for that class\n\n * `force_min_ratios=false`: If `true`, and this algorithm cleans the data such that the ratios for each class exceed those specified in `min_ratios` then further undersampling will be perform so that the final ratios are equal to `min_ratios`.\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n * `try_preserve_type::Bool=true`: When `true`, the function will try to not change the type of the input table (e.g., `DataFrame`). However, for some tables, this may not succeed, and in this case, the table returned will be a column table (named-tuple of vectors). This parameter is ignored if the input is a matrix.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using ENNUndersampler, returning the undersampled versions\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n min_sep=0.01, stds=[3.0 3.0 3.0], class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load ENN model type:\nENNUndersampler = @load ENNUndersampler pkg=Imbalance\n\n# underample the majority classes to sizes relative to the minority class:\nundersampler = ENNUndersampler(min_ratios=0.5, rng=42)\nmach = machine(undersampler)\nX_under, y_under = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 10 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 10 (100.0%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 24 (240.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":package_url" = "https://github.com/JuliaAI/Imbalance.jl" +":package_name" = "Imbalance" +":name" = "ENNUndersampler" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":transform_scitype", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.BorderlineSMOTE1] -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"Integer\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.BorderlineSMOTE1" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:m, :k, :ratios, :rng, :try_preserve_type, :verbosity)`" +":is_pure_julia" = "`true`" +":human_name" = "borderline smot e1" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a BorderlineSMOTE1 model with the given hyper-parameters.\n\n```\nBorderlineSMOTE1\n```\n\nA model type for constructing a borderline smot e1, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBorderlineSMOTE1 = @load BorderlineSMOTE1 pkg=Imbalance\n```\n\nDo `model = BorderlineSMOTE1()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BorderlineSMOTE1(m=...)`.\n\n`BorderlineSMOTE1` implements the BorderlineSMOTE1 algorithm to correct for class imbalance as in Han, H., Wang, W.-Y., & Mao, B.-H. (2005). Borderline-SMOTE: A new over-sampling method in imbalanced data sets learning. In D.S. Huang, X.-P. Zhang, & G.-B. Huang (Eds.), Advances in Intelligent Computing (pp. 878-887). Springer. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = BorderlineSMOTE1()\n```\n\n# Hyperparameters\n\n * `m::Integer=5`: The number of neighbors to consider while checking the BorderlineSMOTE1 condition. Should be within the range `0 < m < N` where N is the number of observations in the data. It will be automatically set to `N-1` if `N ≤ m`.\n * `k::Integer=5`: Number of nearest neighbors to consider in the SMOTE part of the algorithm. Should be within the range `0 < k < n` where n is the number of observations in the smallest class. It will be automatically set to `l-1` for any class with `l` points where `l ≤ k`.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n * `verbosity::Integer=1`: Whenever higher than `0` info regarding the points that will participate in oversampling is logged.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using BorderlineSMOTE1, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 1000, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n stds=[0.1 0.1 0.1], min_sep=0.01, class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 200 (40.8%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 310 (63.3%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 490 (100.0%) \n\n# load BorderlineSMOTE1\nBorderlineSMOTE1 = @load BorderlineSMOTE1 pkg=Imbalance\n\n# wrap the model in a machine\noversampler = BorderlineSMOTE1(m=3, k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 392 (80.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 441 (90.0%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 490 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "BorderlineSMOTE1" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a BorderlineSMOTE1 model with the given hyper-parameters.\n\n```\nBorderlineSMOTE1\n```\n\nA model type for constructing a borderline smot e1, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBorderlineSMOTE1 = @load BorderlineSMOTE1 pkg=Imbalance\n```\n\nDo `model = BorderlineSMOTE1()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BorderlineSMOTE1(m=...)`.\n\n`BorderlineSMOTE1` implements the BorderlineSMOTE1 algorithm to correct for class imbalance as in Han, H., Wang, W.-Y., & Mao, B.-H. (2005). Borderline-SMOTE: A new over-sampling method in imbalanced data sets learning. In D.S. Huang, X.-P. Zhang, & G.-B. Huang (Eds.), Advances in Intelligent Computing (pp. 878-887). Springer. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = BorderlineSMOTE1()\n```\n\n# Hyperparameters\n\n * `m::Integer=5`: The number of neighbors to consider while checking the BorderlineSMOTE1 condition. Should be within the range `0 < m < N` where N is the number of observations in the data. It will be automatically set to `N-1` if `N ≤ m`.\n * `k::Integer=5`: Number of nearest neighbors to consider in the SMOTE part of the algorithm. Should be within the range `0 < k < n` where n is the number of observations in the smallest class. It will be automatically set to `l-1` for any class with `l` points where `l ≤ k`.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n * `verbosity::Integer=1`: Whenever higher than `0` info regarding the points that will participate in oversampling is logged.\n\n# Transform Inputs\n\n * `X`: A matrix or table of floats where each row is an observation from the dataset\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using BorderlineSMOTE1, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 1000, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n stds=[0.1 0.1 0.1], min_sep=0.01, class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 200 (40.8%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 310 (63.3%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 490 (100.0%) \n\n# load BorderlineSMOTE1\nBorderlineSMOTE1 = @load BorderlineSMOTE1 pkg=Imbalance\n\n# wrap the model in a machine\noversampler = BorderlineSMOTE1(m=3, k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 392 (80.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 441 (90.0%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 490 (100.0%) \n```\n""" -":name" = "BorderlineSMOTE1" -":human_name" = "borderline smot e1" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] -":hyperparameters" = "`(:m, :k, :ratios, :rng, :try_preserve_type, :verbosity)`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"Integer\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":constructor" = "`nothing`" [Imbalance.RandomWalkOversampler] -":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":output_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" +":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Imbalance" +":output_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "Imbalance.MLJ.RandomWalkOversampler" -":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" +":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" +":is_pure_julia" = "`true`" +":human_name" = "random walk oversampler" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """Initiate a RandomWalkOversampler model with the given hyper-parameters.\n\n```\nRandomWalkOversampler\n```\n\nA model type for constructing a random walk oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n```\n\nDo `model = RandomWalkOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomWalkOversampler(ratios=...)`.\n\n`RandomWalkOversampler` implements the random walk oversampling algorithm to correct for class imbalance as in Zhang, H., & Li, M. (2014). RWO-Sampling: A random walk over-sampling approach to imbalanced data classification. Information Fusion, 25, 4-20.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = RandomWalkOversampler()\n```\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and\n\n```\n elements in continuous columns should subtype `Infinite` (i.e., have \n [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n```\n\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomWalkOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load RandomWalkOversampler model type:\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n\n# oversample the minority classes to sizes relative to the majority class:\noversampler = RandomWalkOversampler(ratios = Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng = 42)\nmach = machine(oversampler)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%)\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "Imbalance" +":name" = "RandomWalkOversampler" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """Initiate a RandomWalkOversampler model with the given hyper-parameters.\n\n```\nRandomWalkOversampler\n```\n\nA model type for constructing a random walk oversampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n```\n\nDo `model = RandomWalkOversampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomWalkOversampler(ratios=...)`.\n\n`RandomWalkOversampler` implements the random walk oversampling algorithm to correct for class imbalance as in Zhang, H., & Li, M. (2014). RWO-Sampling: A random walk over-sampling approach to imbalanced data classification. Information Fusion, 25, 4-20.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = RandomWalkOversampler()\n```\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and\n\n```\n elements in continuous columns should subtype `Infinite` (i.e., have \n [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n```\n\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomWalkOversampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 3\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\n\njulia> ScientificTypes.schema(X).scitypes\n(Continuous, Continuous, Continuous, Continuous, Continuous)\n# coerce nominal columns to a finite scitype (multiclass or ordered factor)\nX = coerce(X, :Column4=>Multiclass, :Column5=>Multiclass)\n\n# load RandomWalkOversampler model type:\nRandomWalkOversampler = @load RandomWalkOversampler pkg=Imbalance\n\n# oversample the minority classes to sizes relative to the majority class:\noversampler = RandomWalkOversampler(ratios = Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng = 42)\nmach = machine(oversampler)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%)\n```\n""" -":name" = "RandomWalkOversampler" -":human_name" = "random walk oversampler" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" ":implemented_methods" = [":transform_scitype", ":transform"] -":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" -":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" +":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":constructor" = "`nothing`" [MLJTuning.TunedModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`TunedModel`" +":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" +":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJTuning" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "MLJTuning.TunedModel" -":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" +":hyperparameters" = "`(:model, :tuning, :resampling, :measure, :weights, :class_weights, :operation, :range, :selection_heuristic, :train_best, :repeats, :n, :acceleration, :acceleration_resampling, :check_measure, :cache, :compact_history, :logger)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic tuned model" +":is_supervised" = "`true`" +":iteration_parameter" = ":n" +":docstring" = """```\ntuned_model = TunedModel(; model=,\n tuning=RandomSearch(),\n resampling=Holdout(),\n range=nothing,\n measure=nothing,\n n=default_n(tuning, range),\n operation=nothing,\n other_options...)\n```\n\nConstruct a model wrapper for hyper-parameter optimization of a supervised learner, specifying the `tuning` strategy and `model` whose hyper-parameters are to be mutated.\n\n```\ntuned_model = TunedModel(; models=,\n resampling=Holdout(),\n measure=nothing,\n n=length(models),\n operation=nothing,\n other_options...)\n```\n\nConstruct a wrapper for multiple `models`, for selection of an optimal one (equivalent to specifying `tuning=Explicit()` and `range=models` above). Elements of the iterator `models` need not have a common type, but they must all be `Deterministic` or all be `Probabilistic` *and this is not checked* but inferred from the first element generated.\n\nSee below for a complete list of options.\n\n### Training\n\nCalling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or `mach=machine(tuned_model, X, y, w)` will:\n\n * Instigate a search, over clones of `model`, with the hyperparameter mutations specified by `range`, for a model optimizing the specified `measure`, using performance evaluations carried out using the specified `tuning` strategy and `resampling` strategy. In the case `models` is explictly listed, the search is instead over the models generated by the iterator `models`.\n * Fit an internal machine, based on the optimal model `fitted_params(mach).best_model`, wrapping the optimal `model` object in *all* the provided data `X`, `y`(, `w`). Calling `predict(mach, Xnew)` then returns predictions on `Xnew` of this internal machine. The final train can be supressed by setting `train_best=false`.\n\n### Search space\n\nThe `range` objects supported depend on the `tuning` strategy specified. Query the `strategy` docstring for details. To optimize over an explicit list `v` of models of the same type, use `strategy=Explicit()` and specify `model=v[1]` and `range=v`.\n\nThe number of models searched is specified by `n`. If unspecified, then `MLJTuning.default_n(tuning, range)` is used. When `n` is increased and `fit!(mach)` called again, the old search history is re-instated and the search continues where it left off.\n\n### Measures (metrics)\n\nIf more than one `measure` is specified, then only the first is optimized (unless `strategy` is multi-objective) but the performance against every measure specified will be computed and reported in `report(mach).best_performance` and other relevant attributes of the generated report. Options exist to pass per-observation weights or class weights to measures; see below.\n\n*Important.* If a custom measure, `my_measure` is used, and the measure is a score, rather than a loss, be sure to check that `MLJ.orientation(my_measure) == :score` to ensure maximization of the measure, rather than minimization. Override an incorrect value with `MLJ.orientation(::typeof(my_measure)) = :score`.\n\n### Accessing the fitted parameters and other training (tuning) outcomes\n\nA Plots.jl plot of performance estimates is returned by `plot(mach)` or `heatmap(mach)`.\n\nOnce a tuning machine `mach` has bee trained as above, then `fitted_params(mach)` has these keys/values:\n\n| key | value |\n| --------------------:| ---------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_fitted_params` | learned parameters of the optimal model |\n\nThe named tuple `report(mach)` includes these keys/values:\n\n| key | value |\n| --------------------:| ------------------------------------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_history_entry` | corresponding entry in the history, including performance estimate |\n| `best_report` | report generated by fitting the optimal model to all data |\n| `history` | tuning strategy-specific history of all evaluations |\n\nplus other key/value pairs specific to the `tuning` strategy.\n\nEach element of `history` is a property-accessible object with these properties:\n\n| key | value |\n| -------------:| -----------------------------------------------------------------:|\n| `measure` | vector of measures (metrics) |\n| `measurement` | vector of measurements, one per measure |\n| `per_fold` | vector of vectors of unaggregated per-fold measurements |\n| `evaluation` | full `PerformanceEvaluation`/`CompactPerformaceEvaluation` object |\n\n### Complete list of key-word options\n\n * `model`: `Supervised` model prototype that is cloned and mutated to generate models for evaluation\n * `models`: Alternatively, an iterator of MLJ models to be explicitly evaluated. These may have varying types.\n * `tuning=RandomSearch()`: tuning strategy to be applied (eg, `Grid()`). See the [Tuning Models](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#Tuning-Models) section of the MLJ manual for a complete list of options.\n * `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), `StratifiedCV()`) to be applied in performance evaluations\n * `measure`: measure or measures to be applied in performance evaluations; only the first used in optimization (unless the strategy is multi-objective) but all reported to the history\n * `weights`: per-observation weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_weights(measure)`.\n * `class_weights`: class weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_class_weights(measure)`.\n * `repeats=1`: for generating train/test sets multiple times in resampling (\"Monte Carlo\" resampling); see [`evaluate!`](@ref) for details\n * `operation`/`operations` - One of `predict`, `predict_mean`, `predict_mode`, `predict_median`, or `predict_joint`, or a vector of these of the same length as `measure`/`measures`. Automatically inferred if left unspecified.\n * `range`: range object; tuning strategy documentation describes supported types\n * `selection_heuristic`: the rule determining how the best model is decided. According to the default heuristic, `NaiveSelection()`, `measure` (or the first element of `measure`) is evaluated for each resample and these per-fold measurements are aggregrated. The model with the lowest (resp. highest) aggregate is chosen if the measure is a `:loss` (resp. a `:score`).\n * `n`: number of iterations (ie, models to be evaluated); set by tuning strategy if left unspecified\n * `train_best=true`: whether to train the optimal model\n * `acceleration=default_resource()`: mode of parallelization for tuning strategies that support this\n * `acceleration_resampling=CPU1()`: mode of parallelization for resampling\n * `check_measure=true`: whether to check `measure` is compatible with the specified `model` and `operation`)\n * `cache=true`: whether to cache model-specific representations of user-suplied data; set to `false` to conserve memory. Speed gains likely limited to the case `resampling isa Holdout`.\n * `compact_history=true`: whether to write `CompactPerformanceEvaluation`](@ref) or regular [`PerformanceEvaluation`](@ref) objects to the history (accessed via the `:evaluation` key); the compact form excludes some fields to conserve memory.\n * `logger=default_logger()`: a logger for externally reporting model performance evaluations, such as an `MLJFlow.Logger` instance. On startup, `default_logger()=nothing`; use `default_logger(logger)` to set a global logger.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJTuning.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" +":package_name" = "MLJTuning" +":name" = "TunedModel" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\ntuned_model = TunedModel(; model=,\n tuning=RandomSearch(),\n resampling=Holdout(),\n range=nothing,\n measure=nothing,\n n=default_n(tuning, range),\n operation=nothing,\n other_options...)\n```\n\nConstruct a model wrapper for hyper-parameter optimization of a supervised learner, specifying the `tuning` strategy and `model` whose hyper-parameters are to be mutated.\n\n```\ntuned_model = TunedModel(; models=,\n resampling=Holdout(),\n measure=nothing,\n n=length(models),\n operation=nothing,\n other_options...)\n```\n\nConstruct a wrapper for multiple `models`, for selection of an optimal one (equivalent to specifying `tuning=Explicit()` and `range=models` above). Elements of the iterator `models` need not have a common type, but they must all be `Deterministic` or all be `Probabilistic` *and this is not checked* but inferred from the first element generated.\n\nSee below for a complete list of options.\n\n### Training\n\nCalling `fit!(mach)` on a machine `mach=machine(tuned_model, X, y)` or `mach=machine(tuned_model, X, y, w)` will:\n\n * Instigate a search, over clones of `model`, with the hyperparameter mutations specified by `range`, for a model optimizing the specified `measure`, using performance evaluations carried out using the specified `tuning` strategy and `resampling` strategy. In the case `models` is explictly listed, the search is instead over the models generated by the iterator `models`.\n * Fit an internal machine, based on the optimal model `fitted_params(mach).best_model`, wrapping the optimal `model` object in *all* the provided data `X`, `y`(, `w`). Calling `predict(mach, Xnew)` then returns predictions on `Xnew` of this internal machine. The final train can be supressed by setting `train_best=false`.\n\n### Search space\n\nThe `range` objects supported depend on the `tuning` strategy specified. Query the `strategy` docstring for details. To optimize over an explicit list `v` of models of the same type, use `strategy=Explicit()` and specify `model=v[1]` and `range=v`.\n\nThe number of models searched is specified by `n`. If unspecified, then `MLJTuning.default_n(tuning, range)` is used. When `n` is increased and `fit!(mach)` called again, the old search history is re-instated and the search continues where it left off.\n\n### Measures (metrics)\n\nIf more than one `measure` is specified, then only the first is optimized (unless `strategy` is multi-objective) but the performance against every measure specified will be computed and reported in `report(mach).best_performance` and other relevant attributes of the generated report. Options exist to pass per-observation weights or class weights to measures; see below.\n\n*Important.* If a custom measure, `my_measure` is used, and the measure is a score, rather than a loss, be sure to check that `MLJ.orientation(my_measure) == :score` to ensure maximization of the measure, rather than minimization. Override an incorrect value with `MLJ.orientation(::typeof(my_measure)) = :score`.\n\n### Accessing the fitted parameters and other training (tuning) outcomes\n\nA Plots.jl plot of performance estimates is returned by `plot(mach)` or `heatmap(mach)`.\n\nOnce a tuning machine `mach` has bee trained as above, then `fitted_params(mach)` has these keys/values:\n\n| key | value |\n| --------------------:| ---------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_fitted_params` | learned parameters of the optimal model |\n\nThe named tuple `report(mach)` includes these keys/values:\n\n| key | value |\n| --------------------:| ------------------------------------------------------------------:|\n| `best_model` | optimal model instance |\n| `best_history_entry` | corresponding entry in the history, including performance estimate |\n| `best_report` | report generated by fitting the optimal model to all data |\n| `history` | tuning strategy-specific history of all evaluations |\n\nplus other key/value pairs specific to the `tuning` strategy.\n\nEach element of `history` is a property-accessible object with these properties:\n\n| key | value |\n| -------------:| -----------------------------------------------------------------:|\n| `measure` | vector of measures (metrics) |\n| `measurement` | vector of measurements, one per measure |\n| `per_fold` | vector of vectors of unaggregated per-fold measurements |\n| `evaluation` | full `PerformanceEvaluation`/`CompactPerformaceEvaluation` object |\n\n### Complete list of key-word options\n\n * `model`: `Supervised` model prototype that is cloned and mutated to generate models for evaluation\n * `models`: Alternatively, an iterator of MLJ models to be explicitly evaluated. These may have varying types.\n * `tuning=RandomSearch()`: tuning strategy to be applied (eg, `Grid()`). See the [Tuning Models](https://alan-turing-institute.github.io/MLJ.jl/dev/tuning_models/#Tuning-Models) section of the MLJ manual for a complete list of options.\n * `resampling=Holdout()`: resampling strategy (eg, `Holdout()`, `CV()`), `StratifiedCV()`) to be applied in performance evaluations\n * `measure`: measure or measures to be applied in performance evaluations; only the first used in optimization (unless the strategy is multi-objective) but all reported to the history\n * `weights`: per-observation weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_weights(measure)`.\n * `class_weights`: class weights to be passed the measure(s) in performance evaluations, where supported. Check support with `supports_class_weights(measure)`.\n * `repeats=1`: for generating train/test sets multiple times in resampling (\"Monte Carlo\" resampling); see [`evaluate!`](@ref) for details\n * `operation`/`operations` - One of `predict`, `predict_mean`, `predict_mode`, `predict_median`, or `predict_joint`, or a vector of these of the same length as `measure`/`measures`. Automatically inferred if left unspecified.\n * `range`: range object; tuning strategy documentation describes supported types\n * `selection_heuristic`: the rule determining how the best model is decided. According to the default heuristic, `NaiveSelection()`, `measure` (or the first element of `measure`) is evaluated for each resample and these per-fold measurements are aggregrated. The model with the lowest (resp. highest) aggregate is chosen if the measure is a `:loss` (resp. a `:score`).\n * `n`: number of iterations (ie, models to be evaluated); set by tuning strategy if left unspecified\n * `train_best=true`: whether to train the optimal model\n * `acceleration=default_resource()`: mode of parallelization for tuning strategies that support this\n * `acceleration_resampling=CPU1()`: mode of parallelization for resampling\n * `check_measure=true`: whether to check `measure` is compatible with the specified `model` and `operation`)\n * `cache=true`: whether to cache model-specific representations of user-suplied data; set to `false` to conserve memory. Speed gains likely limited to the case `resampling isa Holdout`.\n * `compact_history=true`: whether to write `CompactPerformanceEvaluation`](@ref) or regular [`PerformanceEvaluation`](@ref) objects to the history (accessed via the `:evaluation` key); the compact form excludes some fields to conserve memory.\n * `logger=default_logger()`: a logger for externally reporting model performance evaluations, such as an `MLJFlow.Logger` instance. On startup, `default_logger()=nothing`; use `default_logger(logger)` to set a global logger.\n""" -":name" = "TunedModel" -":human_name" = "probabilistic tuned model" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [] -":hyperparameters" = "`(:model, :tuning, :resampling, :measure, :weights, :class_weights, :operation, :range, :selection_heuristic, :train_best, :repeats, :n, :acceleration, :acceleration_resampling, :check_measure, :cache, :compact_history, :logger)`" -":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":n" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`TunedModel`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" [FeatureSelection.FeatureSelector] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "FeatureSelection" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "FeatureSelection.FeatureSelector" -":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":hyperparameters" = "`(:features, :ignore)`" +":is_pure_julia" = "`true`" +":human_name" = "feature selector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureSelector = @load FeatureSelector pkg=unknown\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" ":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "FeatureSelection" +":name" = "FeatureSelector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nFeatureSelector\n```\n\nA model type for constructing a feature selector, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFeatureSelector = @load FeatureSelector pkg=unknown\n```\n\nDo `model = FeatureSelector()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FeatureSelector(features=...)`.\n\nUse this model to select features (columns) of a table, usually as part of a model `Pipeline`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features, where \"table\" is in the sense of Tables.jl\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated:\n\n * `[]` (empty, the default): filter out all features (columns) which were not encountered in training\n * non-empty vector of feature names (symbols): keep only the specified features (`ignore=false`) or keep only unspecified features (`ignore=true`)\n * function or other callable: keep a feature if the callable returns `true` on its name. For example, specifying `FeatureSelector(features = name -> name in [:x1, :x3], ignore = true)` has the same effect as `FeatureSelector(features = [:x1, :x3], ignore = true)`, namely to select all features, with the exception of `:x1` and `:x3`.\n * `ignore`: whether to ignore or keep specified `features`, as explained above\n\n# Operations\n\n * `transform(mach, Xnew)`: select features from the table `Xnew` as specified by the model, taking features seen during training into account, if relevant\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: the features that will be selected\n\n# Example\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([\"x\", \"y\", \"x\"], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\nselector = FeatureSelector(features=[:ordinal3, ], ignore=true);\n\njulia> transform(fit!(machine(selector, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[\"x\", \"y\", \"x\"],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\n```\n""" -":name" = "FeatureSelector" -":human_name" = "feature selector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:features, :ignore)`" -":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" [FeatureSelection.RecursiveFeatureElimination] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`RecursiveFeatureElimination`" +":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" +":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "FeatureSelection" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "FeatureSelection.RecursiveFeatureElimination" -":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" -":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nRecursiveFeatureElimination(model; n_features=0, step=1)\n```\n\nThis model implements a recursive feature elimination algorithm for feature selection. It recursively removes features, training a base model on the remaining features and evaluating their importance until the desired number of features is selected.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `rfe_model` to data with\n\n```\nmach = machine(rfe_model, X, y)\n```\n\nOR, if the base model supports weights, as\n\n```\nmach = machine(rfe_model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype as that required by the base model; check column scitypes with `schema(X)` and column scitypes required by base model with `input_scitype(basemodel)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous` or `Finite` depending on the `target_scitype` required by the base model; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * model: A base model with a `fit` method that provides information on feature feature importance (i.e `reports_feature_importances(model) == true`)\n * n_features::Real = 0: The number of features to select. If `0`, half of the features are selected. If a positive integer, the parameter is the absolute number of features to select. If a real number between 0 and 1, it is the fraction of features to select.\n * step::Real=1: If the value of step is at least 1, it signifies the quantity of features to eliminate in each iteration. Conversely, if step falls strictly within the range of 0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.\n\n# Operations\n\n * `transform(mach, X)`: transform the input table `X` into a new table containing only columns corresponding to features accepted by the RFE algorithm.\n * `predict(mach, X)`: transform the input table `X` into a new table same as in `transform(mach, X)` above and predict using the fitted base model on the transformed table.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_left`: names of features remaining after recursive feature elimination.\n * `model_fitresult`: fitted parameters of the base model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `scores`: dictionary of scores for each feature in the training dataset. The model deems highly scored variables more significant.\n * `model_report`: report for the fitted base model.\n\n# Examples\n\nThe following example assumes you have MLJDecisionTreeInterface in the active package ennvironment.\n\n```\nusing MLJ\n\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n\n# Creates a dataset where the target only depends on the first 5 columns of the input table.\nA = rand(50, 10);\ny = 10 .* sin.(\n pi .* A[:, 1] .* A[:, 2]\n ) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5];\nX = MLJ.table(A);\n\n# fit a rfe model:\nrf = RandomForestRegressor()\nselector = RecursiveFeatureElimination(rf, n_features=2)\nmach = machine(selector, X, y)\nfit!(mach)\n\n# view the feature importances\nfeature_importances(mach)\n\n# predict using the base model trained on the reduced feature set:\nXnew = MLJ.table(rand(50, 10));\npredict(mach, Xnew)\n\n# transform data with all features to the reduced feature set:\ntransform(mach, Xnew)\n```\n""" -":name" = "RecursiveFeatureElimination" +":hyperparameters" = "`(:model, :n_features, :step)`" +":is_pure_julia" = "`true`" ":human_name" = "deterministic recursive feature elimination" ":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :n_features, :step)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":docstring" = """```\nRecursiveFeatureElimination(model; n_features=0, step=1)\n```\n\nThis model implements a recursive feature elimination algorithm for feature selection. It recursively removes features, training a base model on the remaining features and evaluating their importance until the desired number of features is selected.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `rfe_model` to data with\n\n```\nmach = machine(rfe_model, X, y)\n```\n\nOR, if the base model supports weights, as\n\n```\nmach = machine(rfe_model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype as that required by the base model; check column scitypes with `schema(X)` and column scitypes required by base model with `input_scitype(basemodel)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous` or `Finite` depending on the `target_scitype` required by the base model; check the scitype with `scitype(y)`.\n * `w` is the observation weights which can either be `nothing`(default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different from `weights` kernel which is an hyperparameter to the model, see below.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * model: A base model with a `fit` method that provides information on feature feature importance (i.e `reports_feature_importances(model) == true`)\n * n_features::Real = 0: The number of features to select. If `0`, half of the features are selected. If a positive integer, the parameter is the absolute number of features to select. If a real number between 0 and 1, it is the fraction of features to select.\n * step::Real=1: If the value of step is at least 1, it signifies the quantity of features to eliminate in each iteration. Conversely, if step falls strictly within the range of 0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.\n\n# Operations\n\n * `transform(mach, X)`: transform the input table `X` into a new table containing only columns corresponding to features accepted by the RFE algorithm.\n * `predict(mach, X)`: transform the input table `X` into a new table same as in `transform(mach, X)` above and predict using the fitted base model on the transformed table.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_left`: names of features remaining after recursive feature elimination.\n * `model_fitresult`: fitted parameters of the base model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `scores`: dictionary of scores for each feature in the training dataset. The model deems highly scored variables more significant.\n * `model_report`: report for the fitted base model.\n\n# Examples\n\nThe following example assumes you have MLJDecisionTreeInterface in the active package ennvironment.\n\n```\nusing MLJ\n\nRandomForestRegressor = @load RandomForestRegressor pkg=DecisionTree\n\n# Creates a dataset where the target only depends on the first 5 columns of the input table.\nA = rand(50, 10);\ny = 10 .* sin.(\n pi .* A[:, 1] .* A[:, 2]\n ) + 20 .* (A[:, 3] .- 0.5).^ 2 .+ 10 .* A[:, 4] .+ 5 * A[:, 5];\nX = MLJ.table(A);\n\n# fit a rfe model:\nrf = RandomForestRegressor()\nselector = RecursiveFeatureElimination(rf, n_features=2)\nmach = machine(selector, X, y)\nfit!(mach)\n\n# view the feature importances\nfeature_importances(mach)\n\n# predict using the base model trained on the reduced feature set:\nXnew = MLJ.table(rand(50, 10));\npredict(mach, Xnew)\n\n# transform data with all features to the reduced feature set:\ntransform(mach, Xnew)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/FeatureSelection.jl" +":package_name" = "FeatureSelection" +":name" = "RecursiveFeatureElimination" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`RecursiveFeatureElimination`" - -[Clustering.HierarchicalClustering] -":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Clustering" +":is_wrapper" = "`true`" + +[EvoLinear.EvoSplineRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Symbol\", \"Any\", \"Any\", \"Union{Nothing, Dict}\", \"Any\", \"Symbol\")`" +":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":load_path" = "MLJClusteringInterface.HierarchicalClustering" -":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -":package_url" = "https://github.com/JuliaStats/Clustering.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "EvoLinear.EvoSplineRegressor" +":hyperparameters" = "`(:nrounds, :opt, :batchsize, :act, :eta, :L2, :knots, :rng, :device)`" +":is_pure_julia" = "`true`" +":human_name" = "evo spline regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":nrounds" +":docstring" = """```\nEvoSplineRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoSplineRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoSplineRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoSplineRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoSplineRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoSplineRegressor = @load EvoSplineRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoSplineRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `SplineModel` object returned by EvoSplineRegressor fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" +":package_name" = "EvoLinear" +":name" = "EvoSplineRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nHierarchicalClustering\n```\n\nA model type for constructing a hierarchical clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nHierarchicalClustering = @load HierarchicalClustering pkg=Clustering\n```\n\nDo `model = HierarchicalClustering()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `HierarchicalClustering(linkage=...)`.\n\n[Hierarchical Clustering](https://en.wikipedia.org/wiki/Hierarchical_clustering) is a clustering algorithm that organizes the data in a dendrogram based on distances between groups of points and computes cluster assignments by cutting the dendrogram at a given height. More information is available at the [Clustering.jl documentation](https://juliastats.org/Clustering.jl/stable/index.html). Use `predict` to get cluster assignments. The dendrogram and the dendrogram cutter are accessed from the machine report (see below).\n\nThis is a static implementation, i.e., it does not generalize to new data instances, and there is no training data. For clusterers that do generalize, see [`KMeans`](@ref) or [`KMedoids`](@ref).\n\nIn MLJ or MLJBase, create a machine with\n\n```\nmach = machine(model)\n```\n\n# Hyper-parameters\n\n * `linkage = :single`: linkage method (:single, :average, :complete, :ward, :ward_presquared)\n * `metric = SqEuclidean`: metric (see `Distances.jl` for available metrics)\n * `branchorder = :r`: branchorder (:r, :barjoseph, :optimal)\n * `h = nothing`: height at which the dendrogram is cut\n * `k = 3`: number of clusters.\n\nIf both `k` and `h` are specified, it is guaranteed that the number of clusters is not less than `k` and their height is not above `h`.\n\n# Operations\n\n * `predict(mach, X)`: return cluster label assignments, as an unordered `CategoricalVector`. Here `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n# Report\n\nAfter calling `predict(mach)`, the fields of `report(mach)` are:\n\n * `dendrogram`: the dendrogram that was computed when calling `predict`.\n * `cutter`: a dendrogram cutter that can be called with a height `h` or a number of clusters `k`, to obtain a new assignment of the data points to clusters (see example below).\n\n# Examples\n\n```julia\nusing MLJ\n\nX, labels = make_moons(400, noise=0.09, rng=1) # synthetic data with 2 clusters; X\n\nHierarchicalClustering = @load HierarchicalClustering pkg=Clustering\nmodel = HierarchicalClustering(linkage = :complete)\nmach = machine(model)\n\n# compute and output cluster assignments for observations in `X`:\nyhat = predict(mach, X)\n\n# plot dendrogram:\nusing StatsPlots\nplot(report(mach).dendrogram)\n\n# make new predictions by cutting the dendrogram at another height\nreport(mach).cutter(h = 2.5)\n```\n""" -":name" = "HierarchicalClustering" -":human_name" = "hierarchical clusterer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" -":implemented_methods" = [":clean!", ":predict"] -":hyperparameters" = "`(:linkage, :metric, :branchorder, :h, :k)`" -":hyperparameter_types" = "`(\"Symbol\", \"Distances.SemiMetric\", \"Symbol\", \"Union{Nothing, Float64}\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":predict", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict,)`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[Clustering.DBSCAN] -":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +[EvoLinear.EvoLinearRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Any\", \"Any\", \"Any\", \"Any\", \"Symbol\")`" +":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Clustering" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":load_path" = "MLJClusteringInterface.DBSCAN" -":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -":package_url" = "https://github.com/JuliaStats/Clustering.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "EvoLinear.EvoLinearRegressor" +":hyperparameters" = "`(:updater, :nrounds, :eta, :L1, :L2, :rng, :device)`" +":is_pure_julia" = "`true`" +":human_name" = "evo linear regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":nrounds" +":docstring" = """```\nEvoLinearRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoLinearRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoLinearRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoLinearRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoLinearRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoLinearRegressor = @load EvoLinearRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoLinearRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `EvoLinearModel` object returned by EvoLnear.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" +":package_name" = "EvoLinear" +":name" = "EvoLinearRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nDBSCAN\n```\n\nA model type for constructing a DBSCAN clusterer (density-based spatial clustering of applications with noise), based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nDBSCAN = @load DBSCAN pkg=Clustering\n```\n\nDo `model = DBSCAN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `DBSCAN(radius=...)`.\n\n[DBSCAN](https://en.wikipedia.org/wiki/DBSCAN) is a clustering algorithm that groups together points that are closely packed together (points with many nearby neighbors), marking as outliers points that lie alone in low-density regions (whose nearest neighbors are too far away). More information is available at the [Clustering.jl documentation](https://juliastats.org/Clustering.jl/stable/index.html). Use `predict` to get cluster assignments. Point types - core, boundary or noise - are accessed from the machine report (see below).\n\nThis is a static implementation, i.e., it does not generalize to new data instances, and there is no training data. For clusterers that do generalize, see [`KMeans`](@ref) or [`KMedoids`](@ref).\n\nIn MLJ or MLJBase, create a machine with\n\n```\nmach = machine(model)\n```\n\n# Hyper-parameters\n\n * `radius=1.0`: query radius.\n * `leafsize=20`: number of points binned in each leaf node of the nearest neighbor k-d tree.\n * `min_neighbors=1`: minimum number of a core point neighbors.\n * `min_cluster_size=1`: minimum number of points in a valid cluster.\n\n# Operations\n\n * `predict(mach, X)`: return cluster label assignments, as an unordered `CategoricalVector`. Here `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Note that points of type `noise` will always get a label of `0`.\n\n# Report\n\nAfter calling `predict(mach)`, the fields of `report(mach)` are:\n\n * `point_types`: A `CategoricalVector` with the DBSCAN point type classification, one element per row of `X`. Elements are either `'C'` (core), `'B'` (boundary), or `'N'` (noise).\n * `nclusters`: The number of clusters (excluding the noise \"cluster\")\n * `cluster_labels`: The unique list of cluster labels\n * `clusters`: A vector of `Clustering.DbscanCluster` objects from Clustering.jl, which have these fields:\n\n * `size`: number of points in a cluster (core + boundary)\n * `core_indices`: indices of points in the cluster core\n * `boundary_indices`: indices of points on the cluster boundary\n\n# Examples\n\n```julia\nusing MLJ\n\nX, labels = make_moons(400, noise=0.09, rng=1) # synthetic data with 2 clusters; X\ny = map(labels) do label\n label == 0 ? \"cookie\" : \"monster\"\nend;\ny = coerce(y, Multiclass);\n\nDBSCAN = @load DBSCAN pkg=Clustering\nmodel = DBSCAN(radius=0.13, min_cluster_size=5)\nmach = machine(model)\n\n# compute and output cluster assignments for observations in `X`:\nyhat = predict(mach, X)\n\n# get DBSCAN point types:\nreport(mach).point_types\nreport(mach).nclusters\n\n# compare cluster labels with actual labels:\ncompare = zip(yhat, y) |> collect;\ncompare[1:10] # clusters align with classes\n\n# visualize clusters, noise in red:\npoints = zip(X.x1, X.x2) |> collect\ncolors = map(yhat) do i\n i == 0 ? :red :\n i == 1 ? :blue :\n i == 2 ? :green :\n i == 3 ? :yellow :\n :black\nend\nusing Plots\nscatter(points, color=colors)\n```\n""" -":name" = "DBSCAN" -":human_name" = "DBSCAN clusterer (density-based spatial clustering of applications with noise)" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" -":implemented_methods" = [":clean!", ":predict"] -":hyperparameters" = "`(:radius, :leafsize, :min_neighbors, :min_cluster_size)`" -":hyperparameter_types" = "`(\"Real\", \"Int64\", \"Int64\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":predict", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict,)`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[Clustering.KMeans] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Clustering" -":package_license" = "MIT" -":load_path" = "MLJClusteringInterface.KMeans" -":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -":package_url" = "https://github.com/JuliaStats/Clustering.jl" +[MLJText.TfidfTransformer] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nKMeans\n```\n\nA model type for constructing a K-means clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMeans = @load KMeans pkg=Clustering\n```\n\nDo `model = KMeans()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMeans(k=...)`.\n\n[K-means](http://en.wikipedia.org/wiki/K_means) is a classical method for clustering or vector quantization. It produces a fixed number of clusters, each associated with a *center* (also known as a *prototype*), and each data point is assigned to a cluster with the nearest center.\n\nFrom a mathematical standpoint, K-means is a coordinate descent algorithm that solves the following optimization problem:\n\n$$\n\\text{minimize} \\ \\sum_{i=1}^n \\| \\mathbf{x}_i - \\boldsymbol{\\mu}_{z_i} \\|^2 \\ \\text{w.r.t.} \\ (\\boldsymbol{\\mu}, z)\n$$\n\nHere, $\\boldsymbol{\\mu}_k$ is the center of the $k$-th cluster, and $z_i$ is an index of the cluster for $i$-th point $\\mathbf{x}_i$.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=3`: The number of centroids to use in clustering.\n * `metric::SemiMetric=Distances.SqEuclidean`: The metric used to calculate the clustering. Must have type `PreMetric` from Distances.jl.\n * `init = :kmpp`: One of the following options to indicate how cluster seeds should be initialized:\n\n * `:kmpp`: KMeans++\n * `:kmenc`: K-medoids initialization based on centrality\n * `:rand`: random\n * an instance of `Clustering.SeedingAlgorithm` from Clustering.jl\n * an integer vector of length `k` that provides the indices of points to use as initial cluster centers.\n\n See [documentation of Clustering.jl](https://juliastats.org/Clustering.jl/stable/kmeans.html#Clustering.kmeans).\n\n# Operations\n\n * `predict(mach, Xnew)`: return cluster label assignments, given new features `Xnew` having the same Scitype as `X` above.\n * `transform(mach, Xnew)`: instead return the mean pairwise distances from new samples to the cluster centers.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `centers`: The coordinates of the cluster centers.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `assignments`: The cluster assignments of each point in the training data.\n * `cluster_labels`: The labels assigned to each cluster.\n\n# Examples\n\n```julia\nusing MLJ\nKMeans = @load KMeans pkg=Clustering\n\ntable = load_iris()\ny, X = unpack(table, ==(:target), rng=123)\nmodel = KMeans(k=3)\nmach = machine(model, X) |> fit!\n\nyhat = predict(mach, X)\n@assert yhat == report(mach).assignments\n\ncompare = zip(yhat, y) |> collect;\ncompare[1:8] # clusters align with classes\n\ncenter_dists = transform(mach, fitted_params(mach).centers')\n\n@assert center_dists[1][1] == 0.0\n@assert center_dists[2][2] == 0.0\n@assert center_dists[3][3] == 0.0\n```\n\nSee also [`KMedoids`](@ref)\n""" -":name" = "KMeans" -":human_name" = "K-means clusterer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:k, :metric, :init)`" -":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJText.TfidfTransformer" +":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :smooth_idf)`" +":is_pure_julia" = "`true`" +":human_name" = "TF-IFD transformer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nTfidfTransformer\n```\n\nA model type for constructing a TF-IFD transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n```\n\nDo `model = TfidfTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TfidfTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [TF-IDF scores](https://en.wikipedia.org/wiki/Tf–idf#Inverse_document_frequency_2). Here \"TF\" means term-frequency while \"IDF\" means inverse document frequency (defined below). The TF-IDF score is the product of the two. This is a common term weighting scheme in information retrieval, that has also found good use in document classification. The goal of using TF-IDF instead of the raw frequencies of occurrence of a token in a given document is to scale down the impact of tokens that occur very frequently in a given corpus and that are hence empirically less informative than features that occur in a small fraction of the training corpus.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary and IDF learned in training, return the matrix of TF-IDF scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the strings used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n\n# Examples\n\n`TfidfTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ntfidf_transformer = TfidfTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(tfidf_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ntfidf_transformer = TfidfTransformer()\nmach = machine(tfidf_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`CountTransformer`](@ref), [`BM25Transformer`](@ref)\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":package_url" = "https://github.com/JuliaAI/MLJText.jl" +":package_name" = "MLJText" +":name" = "TfidfTransformer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fitted_params"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[Clustering.AffinityPropagation] -":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Clustering" -":package_license" = "MIT" -":load_path" = "MLJClusteringInterface.AffinityPropagation" -":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -":package_url" = "https://github.com/JuliaStats/Clustering.jl" +[MLJText.CountTransformer] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nAffinityPropagation\n```\n\nA model type for constructing a Affinity Propagation clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nAffinityPropagation = @load AffinityPropagation pkg=Clustering\n```\n\nDo `model = AffinityPropagation()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `AffinityPropagation(damp=...)`.\n\n[Affinity Propagation](https://en.wikipedia.org/wiki/Affinity_propagation) is a clustering algorithm based on the concept of \"message passing\" between data points. More information is available at the [Clustering.jl documentation](https://juliastats.org/Clustering.jl/stable/index.html). Use `predict` to get cluster assignments. Indices of the exemplars, their values, etc, are accessed from the machine report (see below).\n\nThis is a static implementation, i.e., it does not generalize to new data instances, and there is no training data. For clusterers that do generalize, see [`KMeans`](@ref) or [`KMedoids`](@ref).\n\nIn MLJ or MLJBase, create a machine with\n\n```\nmach = machine(model)\n```\n\n# Hyper-parameters\n\n * `damp = 0.5`: damping factor\n * `maxiter = 200`: maximum number of iteration\n * `tol = 1e-6`: tolerance for converenge\n * `preference = nothing`: the (single float) value of the diagonal elements of the similarity matrix. If unspecified, choose median (negative) similarity of all pairs as mentioned [here](https://en.wikipedia.org/wiki/Affinity_propagation#Algorithm)\n * `metric = Distances.SqEuclidean()`: metric (see `Distances.jl` for available metrics)\n\n# Operations\n\n * `predict(mach, X)`: return cluster label assignments, as an unordered `CategoricalVector`. Here `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\n# Report\n\nAfter calling `predict(mach)`, the fields of `report(mach)` are:\n\n * exemplars: indices of the data picked as exemplars in `X`\n * centers: positions of the exemplars in the feature space\n * cluster_labels: labels of clusters given to each datum in `X`\n * iterations: the number of iteration run by the algorithm\n * converged: whether or not the algorithm converges by the maximum iteration\n\n# Examples\n\n```\nusing MLJ\n\nX, labels = make_moons(400, noise=0.9, rng=1)\n\nAffinityPropagation = @load AffinityPropagation pkg=Clustering\nmodel = AffinityPropagation(preference=-10.0)\nmach = machine(model)\n\n# compute and output cluster assignments for observations in `X`:\nyhat = predict(mach, X)\n\n# Get the positions of the exemplars\nreport(mach).centers\n\n# Plot clustering result\nusing GLMakie\nscatter(MLJ.matrix(X)', color=yhat.refs)\n```\n""" -":name" = "AffinityPropagation" -":human_name" = "Affinity Propagation clusterer" -":is_supervised" = "`false`" +":hyperparameter_types" = "`(\"Float64\", \"Float64\")`" +":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" ":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" -":implemented_methods" = [":clean!", ":predict"] -":hyperparameters" = "`(:damp, :maxiter, :tol, :preference, :metric)`" -":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Union{Nothing, Float64}\", \"Distances.SemiMetric\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":load_path" = "MLJText.CountTransformer" +":hyperparameters" = "`(:max_doc_freq, :min_doc_freq)`" +":is_pure_julia" = "`true`" +":human_name" = "count transformer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nCountTransformer\n```\n\nA model type for constructing a count transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCountTransformer = @load CountTransformer pkg=MLJText\n```\n\nDo `model = CountTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CountTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of term counts.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary learned in training, return the matrix of counts for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n\n# Examples\n\n`CountTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncount_transformer = CountTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(count_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ncount_transformer = CountTransformer()\nmach = machine(count_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`BM25Transformer`](@ref)\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":package_url" = "https://github.com/JuliaAI/MLJText.jl" +":package_name" = "MLJText" +":name" = "CountTransformer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fitted_params"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`(:predict,)`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[Clustering.KMedoids] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "Clustering" -":package_license" = "MIT" -":load_path" = "MLJClusteringInterface.KMedoids" -":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" -":package_url" = "https://github.com/JuliaStats/Clustering.jl" +[MLJText.BM25Transformer] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nKMedoids\n```\n\nA model type for constructing a K-medoids clusterer, based on [Clustering.jl](https://github.com/JuliaStats/Clustering.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKMedoids = @load KMedoids pkg=Clustering\n```\n\nDo `model = KMedoids()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KMedoids(k=...)`.\n\n[K-medoids](http://en.wikipedia.org/wiki/K-medoids) is a clustering algorithm that works by finding $k$ data points (called *medoids*) such that the total distance between each data point and the closest *medoid* is minimal.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=3`: The number of centroids to use in clustering.\n * `metric::SemiMetric=Distances.SqEuclidean`: The metric used to calculate the clustering. Must have type `PreMetric` from Distances.jl.\n * `init` (defaults to `:kmpp`): how medoids should be initialized, could be one of the following:\n\n * `:kmpp`: KMeans++\n * `:kmenc`: K-medoids initialization based on centrality\n * `:rand`: random\n * an instance of `Clustering.SeedingAlgorithm` from Clustering.jl\n * an integer vector of length `k` that provides the indices of points to use as initial medoids.\n\n See [documentation of Clustering.jl](https://juliastats.org/Clustering.jl/stable/kmedoids.html#Clustering.kmedoids).\n\n# Operations\n\n * `predict(mach, Xnew)`: return cluster label assignments, given new features `Xnew` having the same Scitype as `X` above.\n * `transform(mach, Xnew)`: instead return the mean pairwise distances from new samples to the cluster centers.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `medoids`: The coordinates of the cluster medoids.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `assignments`: The cluster assignments of each point in the training data.\n * `cluster_labels`: The labels assigned to each cluster.\n\n# Examples\n\n```julia\nusing MLJ\nKMedoids = @load KMedoids pkg=Clustering\n\ntable = load_iris()\ny, X = unpack(table, ==(:target), rng=123)\nmodel = KMedoids(k=3)\nmach = machine(model, X) |> fit!\n\nyhat = predict(mach, X)\n@assert yhat == report(mach).assignments\n\ncompare = zip(yhat, y) |> collect;\ncompare[1:8] # clusters align with classes\n\ncenter_dists = transform(mach, fitted_params(mach).medoids')\n\n@assert center_dists[1][1] == 0.0\n@assert center_dists[2][2] == 0.0\n@assert center_dists[3][3] == 0.0\n```\n\nSee also [`KMeans`](@ref)\n""" -":name" = "KMedoids" -":human_name" = "K-medoids clusterer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" +":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\")`" +":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] -":hyperparameters" = "`(:k, :metric, :init)`" -":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJText.BM25Transformer" +":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :κ, :β, :smooth_idf)`" +":is_pure_julia" = "`true`" +":human_name" = "b m25 transformer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nBM25Transformer\n```\n\nA model type for constructing a b m25 transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBM25Transformer = @load BM25Transformer pkg=MLJText\n```\n\nDo `model = BM25Transformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BM25Transformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [Okapi BM25 document-word statistics](https://en.wikipedia.org/wiki/Okapi_BM25). The BM25 scoring function uses both term frequency (TF) and inverse document frequency (IDF, defined below), as in [`TfidfTransformer`](@ref), but additionally adjusts for the probability that a user will consider a search result relevant based, on the terms in the search query and those in each document.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\nReferences:\n\n * http://ethen8181.github.io/machine-learning/search/bm25_intro.html\n * https://en.wikipedia.org/wiki/Okapi_BM25\n * https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `κ=2`: The term frequency saturation characteristic. Higher values represent slower saturation. What we mean by saturation is the degree to which a term occurring extra times adds to the overall score.\n * `β=0.075`: Amplifies the particular document length compared to the average length. The bigger β is, the more document length is amplified in terms of the overall score. The default value is 0.75, and the bounds are restricted between 0 and 1.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary, IDF, and mean word counts learned in training, return the matrix of BM25 scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n * `mean_words_in_docs`: The mean number of words in each document.\n\n# Examples\n\n`BM25Transformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nBM25Transformer = @load BM25Transformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\nbm25_transformer = BM25Transformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(bm25_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\nbm25_transformer = BM25Transformer()\nmach = machine(bm25_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`CountTransformer`](@ref)\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":package_url" = "https://github.com/JuliaAI/MLJText.jl" +":package_name" = "MLJText" +":name" = "BM25Transformer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fitted_params"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[EvoLinear.EvoSplineRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +[LightGBM.LGBMClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Float64\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Vector{Float64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" +":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT Expat" +":prediction_type" = ":probabilistic" +":load_path" = "LightGBM.MLJInterface.LGBMClassifier" +":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :pred_early_stop, :pred_early_stop_freq, :pred_early_stop_margin, :is_unbalance, :scale_pos_weight, :sigmoid, :boost_from_average, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :multi_error_top_k, :auc_mu_weights, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" +":is_pure_julia" = "`false`" +":human_name" = "LightGBM classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLGBMClassifier\n```\n\nA model type for constructing a LightGBM classifier, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM\n```\n\nDo `model = LGBMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMClassifier(objective=...)`.\n\n`LightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of classification tasks.\n\n# Training data In MLJ or MLJBase, bind an instance `model` to data with\n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with scitype(y).\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMClassification` object, a `CategoricalArray` of the input class names, and the classifier with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM \n\nX, y = @load_iris \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMClassifier() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMClassification`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" +":package_name" = "LightGBM" +":name" = "LGBMClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "EvoLinear" -":package_license" = "MIT" -":load_path" = "EvoLinear.EvoSplineRegressor" -":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" -":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nEvoSplineRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoSplineRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoSplineRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoSplineRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoSplineRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoSplineRegressor = @load EvoSplineRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoSplineRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `SplineModel` object returned by EvoSplineRegressor fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n""" -":name" = "EvoSplineRegressor" -":human_name" = "evo spline regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict", ":update"] -":hyperparameters" = "`(:nrounds, :opt, :batchsize, :act, :eta, :L2, :knots, :rng, :device)`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Symbol\", \"Any\", \"Any\", \"Union{Nothing, Dict}\", \"Any\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" +":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[EvoLinear.EvoLinearRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +[LightGBM.LGBMRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" +":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT Expat" +":prediction_type" = ":deterministic" +":load_path" = "LightGBM.MLJInterface.LGBMRegressor" +":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :is_unbalance, :boost_from_average, :reg_sqrt, :alpha, :fair_c, :poisson_max_delta_step, :tweedie_variance_power, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" +":is_pure_julia" = "`false`" +":human_name" = "LightGBM regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLGBMRegressor\n```\n\nA model type for constructing a LightGBM regressor, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM\n```\n\nDo `model = LGBMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMRegressor(objective=...)`.\n\nLightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification, regression and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of regression tasks.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with \n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMRegression` object, an empty vector, and the regressor with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM \n\nX, y = @load_boston # a table and a vector \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMRegressor() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMRegression`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" +":package_name" = "LightGBM" +":name" = "LGBMRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "EvoLinear" -":package_license" = "MIT" -":load_path" = "EvoLinear.EvoLinearRegressor" -":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" -":package_url" = "https://github.com/jeremiedb/EvoLinear.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nEvoLinearRegressor(; kwargs...)\n```\n\nA model type for constructing a EvoLinearRegressor, based on [EvoLinear.jl](https://github.com/jeremiedb/EvoLinear.jl), and implementing both an internal API and the MLJ model interface.\n\n# Keyword arguments\n\n * `loss=:mse`: loss function to be minimised. Can be one of:\n\n * `:mse`\n * `:logistic`\n * `:poisson`\n * `:gamma`\n * `:tweedie`\n * `nrounds=10`: maximum number of training rounds.\n * `eta=1`: Learning rate. Typically in the range `[1e-2, 1]`.\n * `L1=0`: Regularization penalty applied by shrinking to 0 weight update if update is < L1. No penalty if update > L1. Results in sparse feature selection. Typically in the `[0, 1]` range on normalized features.\n * `L2=0`: Regularization penalty applied to the squared of the weight update value. Restricts large parameter values. Typically in the `[0, 1]` range on normalized features.\n * `rng=123`: random seed. Not used at the moment.\n * `updater=:all`: training method. Only `:all` is supported at the moment. Gradients for each feature are computed simultaneously, then bias is updated based on all features update.\n * `device=:cpu`: Only `:cpu` is supported at the moment.\n\n# Internal API\n\nDo `config = EvoLinearRegressor()` to construct an hyper-parameter struct with default hyper-parameters. Provide keyword arguments as listed above to override defaults, for example:\n\n```julia\nEvoLinearRegressor(loss=:logistic, L1=1e-3, L2=1e-2, nrounds=100)\n```\n\n## Training model\n\nA model is built using [`fit`](@ref):\n\n```julia\nconfig = EvoLinearRegressor()\nm = fit(config; x, y, w)\n```\n\n## Inference\n\nFitted results is an `EvoLinearModel` which acts as a prediction function when passed a features matrix as argument. \n\n```julia\npreds = m(x)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoLinearRegressor = @load EvoLinearRegressor pkg=EvoLinear\n```\n\nDo `model = EvoLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoLinearRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where: \n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given\n\nfeatures `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: the `EvoLinearModel` object returned by EvoLnear.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:coef`: Vector of coefficients (βs) associated to each of the features.\n * `:bias`: Value of the bias.\n * `:names`: Names of each of the features.\n""" -":name" = "EvoLinearRegressor" -":human_name" = "evo linear regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict", ":update"] -":hyperparameters" = "`(:updater, :nrounds, :eta, :L1, :L2, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Any\", \"Any\", \"Any\", \"Any\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" +":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[MLJText.TfidfTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":target_in_fit" = "`false`" +[LaplaceRedux.LaplaceClassifier] +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\", \"Symbol\")`" +":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractArray{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl/blob/main/LICENSE" +":prediction_type" = ":probabilistic" +":load_path" = "LaplaceRedux.LaplaceClassifier" +":hyperparameters" = "`(:model, :flux_loss, :optimiser, :epochs, :batch_size, :subset_of_weights, :subnetwork_indices, :hessian_structure, :backend, :observational_noise, :prior_mean, :prior_precision_matrix, :fit_prior_nsteps, :link_approx)`" ":is_pure_julia" = "`true`" -":package_name" = "MLJText" -":package_license" = "MIT" -":load_path" = "MLJText.TfidfTransformer" -":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" -":package_url" = "https://github.com/JuliaAI/MLJText.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":human_name" = "laplace classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLaplaceClassifier\n```\n\nA model type for constructing a laplace classifier, based on [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLaplaceClassifier = @load LaplaceClassifier pkg=LaplaceRedux\n```\n\nDo `model = LaplaceClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LaplaceClassifier(model=...)`.\n\n`LaplaceClassifier` implements the [Laplace Redux – Effortless Bayesian Deep Learning](https://proceedings.neurips.cc/paper/2021/hash/a3923dbe2f702eff254d67b48ae2f06e-Abstract.html), originally published in Daxberger, E., Kristiadi, A., Immer, A., Eschenhagen, R., Bauer, M., Hennig, P. (2021): \"Laplace Redux – Effortless Bayesian Deep Learning.\", NIPS'21: Proceedings of the 35th International Conference on Neural Information Processing Systems*, Article No. 1537, pp. 20089–20103 for classification models.\n\n# Training data\n\nIn MLJ or MLJBase, given a dataset X,y and a `Flux_Chain` adapted to the dataset, pass the chain to the model\n\n```julia\nlaplace_model = LaplaceClassifier(model = Flux_Chain,kwargs...)\n```\n\nthen bind an instance `laplace_model` to data with\n\n```\nmach = machine(laplace_model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters (format: name-type-default value-restrictions)\n\n * `model::Union{Flux.Chain,Nothing} = nothing`: Either nothing or a Flux model provided by the user and compatible with the dataset. In the former case, LaplaceRedux will use a standard MLP with 2 hidden layers with 20 neurons each.\n * `flux_loss = Flux.Losses.logitcrossentropy` : a Flux loss function\n * `optimiser = Adam()` a Flux optimiser\n * `epochs::Integer = 1000::(_ > 0)`: the number of training epochs.\n * `batch_size::Integer = 32::(_ > 0)`: the batch size.\n * `subset_of_weights::Symbol = :all::(_ in (:all, :last_layer, :subnetwork))`: the subset of weights to use, either `:all`, `:last_layer`, or `:subnetwork`.\n * `subnetwork_indices = nothing`: the indices of the subnetworks.\n * `hessian_structure::Union{HessianStructure,Symbol,String} = :full::(_ in (:full, :diagonal))`: the structure of the Hessian matrix, either `:full` or `:diagonal`.\n * `backend::Symbol = :GGN::(_ in (:GGN, :EmpiricalFisher))`: the backend to use, either `:GGN` or `:EmpiricalFisher`.\n * `observational_noise (alias σ)::Float64 = 1.0`: the standard deviation of the prior distribution.\n * `prior_mean (alias μ₀)::Float64 = 0.0`: the mean of the prior distribution.\n * `prior_precision_matrix (alias P₀)::Union{AbstractMatrix,UniformScaling,Nothing} = nothing`: the covariance matrix of the prior distribution.\n * `fit_prior_nsteps::Int = 100::(_ > 0)`: the number of steps used to fit the priors.\n * `link_approx::Symbol = :probit::(_ in (:probit, :plugin))`: the approximation to adopt to compute the probabilities.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `mean`: The mean of the posterior distribution.\n * `H`: The Hessian of the posterior distribution.\n * `P`: The precision matrix of the posterior distribution.\n * `cov_matrix`: The covariance matrix of the posterior distribution.\n * `n_data`: The number of data points.\n * `n_params`: The number of parameters.\n * `n_out`: The number of outputs.\n * `loss`: The loss value of the posterior distribution.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `loss_history`: an array containing the total loss per epoch.\n\n# Accessor functions\n\n * `training_losses(mach)`: return the loss history from report\n\n# Examples\n\n```\nusing MLJ\nLaplaceClassifier = @load LaplaceClassifier pkg=LaplaceRedux\n\nX, y = @load_iris\n\n# Define the Flux Chain model\nusing Flux\nmodel = Chain(\n Dense(4, 10, relu),\n Dense(10, 10, relu),\n Dense(10, 3)\n)\n\n#Define the LaplaceClassifier\nmodel = LaplaceClassifier(model=model)\n\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\ntraining_losses(mach) # loss history per epoch\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\nfitted_params(mach) # NamedTuple with the fitted params of Laplace\n\n```\n\nSee also [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl" +":package_name" = "LaplaceRedux" +":name" = "LaplaceClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nTfidfTransformer\n```\n\nA model type for constructing a TF-IFD transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n```\n\nDo `model = TfidfTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TfidfTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [TF-IDF scores](https://en.wikipedia.org/wiki/Tf–idf#Inverse_document_frequency_2). Here \"TF\" means term-frequency while \"IDF\" means inverse document frequency (defined below). The TF-IDF score is the product of the two. This is a common term weighting scheme in information retrieval, that has also found good use in document classification. The goal of using TF-IDF instead of the raw frequencies of occurrence of a token in a given document is to scale down the impact of tokens that occur very frequently in a given corpus and that are hence empirically less informative than features that occur in a small fraction of the training corpus.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary and IDF learned in training, return the matrix of TF-IDF scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the strings used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n\n# Examples\n\n`TfidfTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nTfidfTransformer = @load TfidfTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ntfidf_transformer = TfidfTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(tfidf_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ntfidf_transformer = TfidfTransformer()\nmach = machine(tfidf_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`CountTransformer`](@ref), [`BM25Transformer`](@ref)\n""" -":name" = "TfidfTransformer" -":human_name" = "TF-IFD transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fitted_params"] -":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :smooth_idf)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":getproperty", ":setproperty!", ":clean!", ":fit", ":fitted_params", ":is_same_except", ":predict", ":reformat", ":selectrows", ":training_losses", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJText.CountTransformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJText" -":package_license" = "MIT" -":load_path" = "MLJText.CountTransformer" -":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" -":package_url" = "https://github.com/JuliaAI/MLJText.jl" -":is_wrapper" = "`false`" +":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`true`" ":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nCountTransformer\n```\n\nA model type for constructing a count transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCountTransformer = @load CountTransformer pkg=MLJText\n```\n\nDo `model = CountTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CountTransformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of term counts.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary learned in training, return the matrix of counts for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n\n# Examples\n\n`CountTransformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nCountTransformer = @load CountTransformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncount_transformer = CountTransformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(count_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\ncount_transformer = CountTransformer()\nmach = machine(count_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`BM25Transformer`](@ref)\n""" -":name" = "CountTransformer" -":human_name" = "count transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fitted_params"] -":hyperparameters" = "`(:max_doc_freq, :min_doc_freq)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJText.BM25Transformer] -":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:Tuple{Vararg{ScientificTypesBase.Textual, var\"_s1\"}} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" -":target_in_fit" = "`false`" +[LaplaceRedux.LaplaceRegressor] +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\")`" +":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractArray{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl/blob/main/LICENSE" +":prediction_type" = ":probabilistic" +":load_path" = "LaplaceRedux.LaplaceRegressor" +":hyperparameters" = "`(:model, :flux_loss, :optimiser, :epochs, :batch_size, :subset_of_weights, :subnetwork_indices, :hessian_structure, :backend, :observational_noise, :prior_mean, :prior_precision_matrix, :fit_prior_nsteps)`" ":is_pure_julia" = "`true`" -":package_name" = "MLJText" -":package_license" = "MIT" -":load_path" = "MLJText.BM25Transformer" -":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" -":package_url" = "https://github.com/JuliaAI/MLJText.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":human_name" = "laplace regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLaplaceRegressor\n```\n\nA model type for constructing a laplace regressor, based on [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLaplaceRegressor = @load LaplaceRegressor pkg=LaplaceRedux\n```\n\nDo `model = LaplaceRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LaplaceRegressor(model=...)`.\n\n`LaplaceRegressor` implements the [Laplace Redux – Effortless Bayesian Deep Learning](https://proceedings.neurips.cc/paper/2021/hash/a3923dbe2f702eff254d67b48ae2f06e-Abstract.html), originally published in Daxberger, E., Kristiadi, A., Immer, A., Eschenhagen, R., Bauer, M., Hennig, P. (2021): \"Laplace Redux – Effortless Bayesian Deep Learning.\", NIPS'21: Proceedings of the 35th International Conference on Neural Information Processing Systems*, Article No. 1537, pp. 20089–20103 for regression models.\n\n# Training data\n\nIn MLJ or MLJBase, given a dataset X,y and a `Flux_Chain` adapted to the dataset, pass the chain to the model\n\n```julia\nlaplace_model = LaplaceRegressor(model = Flux_Chain,kwargs...)\n```\n\nthen bind an instance `laplace_model` to data with\n\n```\nmach = machine(laplace_model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters (format: name-type-default value-restrictions)\n\n * `model::Union{Flux.Chain,Nothing} = nothing`: Either nothing or a Flux model provided by the user and compatible with the dataset. In the former case, LaplaceRedux will use a standard MLP with 2 hidden layers with 20 neurons each.\n * `flux_loss = Flux.Losses.logitcrossentropy` : a Flux loss function\n * `optimiser = Adam()` a Flux optimiser\n * `epochs::Integer = 1000::(_ > 0)`: the number of training epochs.\n * `batch_size::Integer = 32::(_ > 0)`: the batch size.\n * `subset_of_weights::Symbol = :all::(_ in (:all, :last_layer, :subnetwork))`: the subset of weights to use, either `:all`, `:last_layer`, or `:subnetwork`.\n * `subnetwork_indices = nothing`: the indices of the subnetworks.\n * `hessian_structure::Union{HessianStructure,Symbol,String} = :full::(_ in (:full, :diagonal))`: the structure of the Hessian matrix, either `:full` or `:diagonal`.\n * `backend::Symbol = :GGN::(_ in (:GGN, :EmpiricalFisher))`: the backend to use, either `:GGN` or `:EmpiricalFisher`.\n * `observational_noise (alias σ)::Float64 = 1.0`: the standard deviation of the prior distribution.\n * `prior_mean (alias μ₀)::Float64 = 0.0`: the mean of the prior distribution.\n * `prior_precision_matrix (alias P₀)::Union{AbstractMatrix,UniformScaling,Nothing} = nothing`: the covariance matrix of the prior distribution.\n * `fit_prior_nsteps::Int = 100::(_ > 0)`: the number of steps used to fit the priors.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `mean`: The mean of the posterior distribution.\n * `H`: The Hessian of the posterior distribution.\n * `P`: The precision matrix of the posterior distribution.\n * `cov_matrix`: The covariance matrix of the posterior distribution.\n * `n_data`: The number of data points.\n * `n_params`: The number of parameters.\n * `n_out`: The number of outputs.\n\n * `loss`: The loss value of the posterior distribution.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `loss_history`: an array containing the total loss per epoch.\n\n# Accessor functions\n\n * `training_losses(mach)`: return the loss history from report\n\n# Examples\n\n```\nusing MLJ\nusing Flux\nLaplaceRegressor = @load LaplaceRegressor pkg=LaplaceRedux\nmodel = Chain(\n Dense(4, 10, relu),\n Dense(10, 10, relu),\n Dense(10, 1)\n)\nmodel = LaplaceRegressor(model=model)\n\nX, y = make_regression(100, 4; noise=0.5, sparse=0.2, outliers=0.1)\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, 4; rng=123)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\ntraining_losses(mach) # loss history per epoch\nfitted_params(mach) # NamedTuple with the fitted params of Laplace\n\n```\n\nSee also [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl" +":package_name" = "LaplaceRedux" +":name" = "LaplaceRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBM25Transformer\n```\n\nA model type for constructing a b m25 transformer, based on [MLJText.jl](https://github.com/JuliaAI/MLJText.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBM25Transformer = @load BM25Transformer pkg=MLJText\n```\n\nDo `model = BM25Transformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BM25Transformer(max_doc_freq=...)`.\n\nThe transformer converts a collection of documents, tokenized or pre-parsed as bags of words/ngrams, to a matrix of [Okapi BM25 document-word statistics](https://en.wikipedia.org/wiki/Okapi_BM25). The BM25 scoring function uses both term frequency (TF) and inverse document frequency (IDF, defined below), as in [`TfidfTransformer`](@ref), but additionally adjusts for the probability that a user will consider a search result relevant based, on the terms in the search query and those in each document.\n\nIn textbooks and implementations there is variation in the definition of IDF. Here two IDF definitions are available. The default, smoothed option provides the IDF for a term `t` as `log((1 + n)/(1 + df(t))) + 1`, where `n` is the total number of documents and `df(t)` the number of documents in which `t` appears. Setting `smooth_df = false` provides an IDF of `log(n/df(t)) + 1`.\n\nReferences:\n\n * http://ethen8181.github.io/machine-learning/search/bm25_intro.html\n * https://en.wikipedia.org/wiki/Okapi_BM25\n * https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any vector whose elements are either tokenized documents or bags of words/ngrams. Specifically, each element is one of the following:\n\n * A vector of abstract strings (tokens), e.g., `[\"I\", \"like\", \"Sam\", \".\", \"Sam\", \"is\", \"nice\", \".\"]` (scitype `AbstractVector{Textual}`)\n * A dictionary of counts, indexed on abstract strings, e.g., `Dict(\"I\"=>1, \"Sam\"=>2, \"Sam is\"=>1)` (scitype `Multiset{Textual}}`)\n * A dictionary of counts, indexed on plain ngrams, e.g., `Dict((\"I\",)=>1, (\"Sam\",)=>2, (\"I\", \"Sam\")=>1)` (scitype `Multiset{<:NTuple{N,Textual} where N}`); here a *plain ngram* is a tuple of abstract strings.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `max_doc_freq=1.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `> max_doc_freq` documents will not be considered by the transformer. For example, if `max_doc_freq` is set to 0.9, terms that are in more than 90% of the documents will be removed.\n * `min_doc_freq=0.0`: Restricts the vocabulary that the transformer will consider. Terms that occur in `< max_doc_freq` documents will not be considered by the transformer. A value of 0.01 means that only terms that are at least in 1% of the documents will be included.\n * `κ=2`: The term frequency saturation characteristic. Higher values represent slower saturation. What we mean by saturation is the degree to which a term occurring extra times adds to the overall score.\n * `β=0.075`: Amplifies the particular document length compared to the average length. The bigger β is, the more document length is amplified in terms of the overall score. The default value is 0.75, and the bounds are restricted between 0 and 1.\n * `smooth_idf=true`: Control which definition of IDF to use (see above).\n\n# Operations\n\n * `transform(mach, Xnew)`: Based on the vocabulary, IDF, and mean word counts learned in training, return the matrix of BM25 scores for `Xnew`, a vector of the same form as `X` above. The matrix has size `(n, p)`, where `n = length(Xnew)` and `p` the size of the vocabulary. Tokens/ngrams not appearing in the learned vocabulary are scored zero.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vocab`: A vector containing the string used in the transformer's vocabulary.\n * `idf_vector`: The transformer's calculated IDF vector.\n * `mean_words_in_docs`: The mean number of words in each document.\n\n# Examples\n\n`BM25Transformer` accepts a variety of inputs. The example below transforms tokenized documents:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\nBM25Transformer = @load BM25Transformer pkg=MLJText\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\nbm25_transformer = BM25Transformer()\n\njulia> tokenized_docs = TextAnalysis.tokenize.(docs)\n2-element Vector{Vector{String}}:\n [\"Hi\", \"my\", \"name\", \"is\", \"Sam\", \".\"]\n [\"How\", \"are\", \"you\", \"today\", \"?\"]\n\nmach = machine(bm25_transformer, tokenized_docs)\nfit!(mach)\n\nfitted_params(mach)\n\ntfidf_mat = transform(mach, tokenized_docs)\n```\n\nAlternatively, one can provide documents pre-parsed as ngrams counts:\n\n```julia\nusing MLJ\nimport TextAnalysis\n\ndocs = [\"Hi my name is Sam.\", \"How are you today?\"]\ncorpus = TextAnalysis.Corpus(TextAnalysis.NGramDocument.(docs, 1, 2))\nngram_docs = TextAnalysis.ngrams.(corpus)\n\njulia> ngram_docs[1]\nDict{AbstractString, Int64} with 11 entries:\n \"is\" => 1\n \"my\" => 1\n \"name\" => 1\n \".\" => 1\n \"Hi\" => 1\n \"Sam\" => 1\n \"my name\" => 1\n \"Hi my\" => 1\n \"name is\" => 1\n \"Sam .\" => 1\n \"is Sam\" => 1\n\nbm25_transformer = BM25Transformer()\nmach = machine(bm25_transformer, ngram_docs)\nMLJ.fit!(mach)\nfitted_params(mach)\n\ntfidf_mat = transform(mach, ngram_docs)\n```\n\nSee also [`TfidfTransformer`](@ref), [`CountTransformer`](@ref)\n""" -":name" = "BM25Transformer" -":human_name" = "b m25 transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fitted_params"] -":hyperparameters" = "`(:max_doc_freq, :min_doc_freq, :κ, :β, :smooth_idf)`" -":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":getproperty", ":setproperty!", ":clean!", ":fit", ":fitted_params", ":is_same_except", ":predict", ":reformat", ":selectrows", ":training_losses", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractArray{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`true`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[LightGBM.LGBMClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[SymbolicRegression.MultitargetSRRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "Apache-2.0" +":prediction_type" = ":deterministic" +":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRRegressor" +":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" +":is_pure_julia" = "`true`" +":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(binary_operators=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::Node{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is \"BFGS\", but \"NelderMead\" is also supported.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default. If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":package_name" = "SymbolicRegression" +":name" = "MultitargetSRRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LightGBM" -":package_license" = "MIT Expat" -":load_path" = "LightGBM.MLJInterface.LGBMClassifier" -":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" -":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLGBMClassifier\n```\n\nA model type for constructing a LightGBM classifier, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM\n```\n\nDo `model = LGBMClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMClassifier(objective=...)`.\n\n`LightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of classification tasks.\n\n# Training data In MLJ or MLJBase, bind an instance `model` to data with\n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with scitype(y).\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMClassification` object, a `CategoricalArray` of the input class names, and the classifier with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMClassifier = @load LGBMClassifier pkg=LightGBM \n\nX, y = @load_iris \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMClassifier() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMClassification`](@ref)\n""" -":name" = "LGBMClassifier" -":human_name" = "LightGBM classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :pos_bagging_fraction, :neg_bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :pred_early_stop, :pred_early_stop_freq, :pred_early_stop_margin, :is_unbalance, :scale_pos_weight, :sigmoid, :boost_from_average, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :multi_error_top_k, :auc_mu_weights, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Float64\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Vector{Float64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[LightGBM.LGBMRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[SymbolicRegression.SRRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" +":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "Apache-2.0" +":prediction_type" = ":deterministic" +":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" +":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :populations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :population_size, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :runtests, :loss_type, :selection_method, :dimensions_type)`" +":is_pure_julia" = "`true`" +":human_name" = "Symbolic Regression via Evolutionary Search" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(binary_operators=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whoose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::Node{T}`, `dataset::Dataset{T}`, and `options::Options`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to each variable. By default, this is 1.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is \"BFGS\", but \"NelderMead\" is also supported.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `output_file`: What file to store equations to, as a backup.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default. If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +":package_name" = "SymbolicRegression" +":name" = "SRRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LightGBM" -":package_license" = "MIT Expat" -":load_path" = "LightGBM.MLJInterface.LGBMRegressor" -":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" -":package_url" = "https://github.com/IQVIA-ML/LightGBM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLGBMRegressor\n```\n\nA model type for constructing a LightGBM regressor, based on [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM\n```\n\nDo `model = LGBMRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LGBMRegressor(objective=...)`.\n\nLightGBM, short for light gradient-boosting machine, is a framework for gradient boosting based on decision tree algorithms and used for classification, regression and other machine learning tasks, with a focus on performance and scalability. This model in particular is used for various types of regression tasks.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with \n\nmach = machine(model, X, y) \n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`; alternatively, `X` is any `AbstractMatrix` with `Continuous` elements; check the scitype with `scitype(X)`.\n * y is a vector of targets whose items are of scitype `Continuous`. Check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Hyper-parameters\n\nSee https://lightgbm.readthedocs.io/en/v3.3.5/Parameters.html.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `fitresult`: Fitted model information, contains a `LGBMRegression` object, an empty vector, and the regressor with all its parameters\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_metrics`: A dictionary containing all training metrics.\n * `importance`: A `namedtuple` containing:\n\n * `gain`: The total gain of each split used by the model\n * `split`: The number of times each feature is used by the model.\n\n# Examples\n\n```julia\n\nusing DataFrames\nusing MLJ\n\n# load the model\nLGBMRegressor = @load LGBMRegressor pkg=LightGBM \n\nX, y = @load_boston # a table and a vector \nX = DataFrame(X)\ntrain, test = partition(collect(eachindex(y)), 0.70, shuffle=true)\n\nfirst(X, 3)\nlgb = LGBMRegressor() # initialise a model with default params\nmach = machine(lgb, X[train, :], y[train]) |> fit!\n\npredict(mach, X[test, :])\n\n# access feature importances\nmodel_report = report(mach)\ngain_importance = model_report.importance.gain\nsplit_importance = model_report.importance.split\n```\n\nSee also [LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) and the unwrapped model type [`LightGBM.LGBMRegression`](@ref)\n""" -":name" = "LGBMRegressor" -":human_name" = "LightGBM regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":predict", ":update"] -":hyperparameters" = "`(:objective, :boosting, :num_iterations, :learning_rate, :num_leaves, :tree_learner, :num_threads, :device_type, :seed, :deterministic, :force_col_wise, :force_row_wise, :histogram_pool_size, :max_depth, :min_data_in_leaf, :min_sum_hessian_in_leaf, :bagging_fraction, :bagging_freq, :bagging_seed, :feature_fraction, :feature_fraction_bynode, :feature_fraction_seed, :extra_trees, :extra_seed, :early_stopping_round, :first_metric_only, :max_delta_step, :lambda_l1, :lambda_l2, :linear_lambda, :min_gain_to_split, :drop_rate, :max_drop, :skip_drop, :xgboost_dart_mode, :uniform_drop, :drop_seed, :top_rate, :other_rate, :min_data_per_group, :max_cat_threshold, :cat_l2, :cat_smooth, :max_cat_to_onehot, :top_k, :monotone_constraints, :monotone_constraints_method, :monotone_penalty, :feature_contri, :forcedsplits_filename, :refit_decay_rate, :cegb_tradeoff, :cegb_penalty_split, :cegb_penalty_feature_lazy, :cegb_penalty_feature_coupled, :path_smooth, :interaction_constraints, :verbosity, :linear_tree, :max_bin, :max_bin_by_feature, :min_data_in_bin, :bin_construct_sample_cnt, :data_random_seed, :is_enable_sparse, :enable_bundle, :use_missing, :zero_as_missing, :feature_pre_filter, :pre_partition, :two_round, :header, :label_column, :weight_column, :ignore_column, :categorical_feature, :forcedbins_filename, :precise_float_parser, :start_iteration_predict, :num_iteration_predict, :predict_raw_score, :predict_leaf_index, :predict_contrib, :predict_disable_shape_check, :is_unbalance, :boost_from_average, :reg_sqrt, :alpha, :fair_c, :poisson_max_delta_step, :tweedie_variance_power, :metric, :metric_freq, :is_provide_training_metric, :eval_at, :num_machines, :local_listen_port, :time_out, :machine_list_filename, :machines, :gpu_platform_id, :gpu_device_id, :gpu_use_dp, :num_gpu, :truncate_booster)`" -":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[LaplaceRedux.LaplaceClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" +[EvoTrees.EvoTreeClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractArray{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractArray{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "Apache" +":prediction_type" = ":probabilistic" +":load_path" = "EvoTrees.EvoTreeClassifier" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng, :device)`" +":is_pure_julia" = "`true`" +":human_name" = "evo tree classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":nrounds" +":docstring" = """EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n * `min_weight=1.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `:gpu`.\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/Evovest/EvoTrees.jl" +":package_name" = "EvoTrees" +":name" = "EvoTreeClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "LaplaceRedux" -":package_license" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl/blob/main/LICENSE" -":load_path" = "LaplaceRedux.LaplaceClassifier" -":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" -":package_url" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLaplaceClassifier\n```\n\nA model type for constructing a laplace classifier, based on [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLaplaceClassifier = @load LaplaceClassifier pkg=LaplaceRedux\n```\n\nDo `model = LaplaceClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LaplaceClassifier(model=...)`.\n\n`LaplaceClassifier` implements the [Laplace Redux – Effortless Bayesian Deep Learning](https://proceedings.neurips.cc/paper/2021/hash/a3923dbe2f702eff254d67b48ae2f06e-Abstract.html), originally published in Daxberger, E., Kristiadi, A., Immer, A., Eschenhagen, R., Bauer, M., Hennig, P. (2021): \"Laplace Redux – Effortless Bayesian Deep Learning.\", NIPS'21: Proceedings of the 35th International Conference on Neural Information Processing Systems*, Article No. 1537, pp. 20089–20103 for classification models.\n\n# Training data\n\nIn MLJ or MLJBase, given a dataset X,y and a `Flux_Chain` adapted to the dataset, pass the chain to the model\n\n```julia\nlaplace_model = LaplaceClassifier(model = Flux_Chain,kwargs...)\n```\n\nthen bind an instance `laplace_model` to data with\n\n```\nmach = machine(laplace_model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters (format: name-type-default value-restrictions)\n\n * `model::Union{Flux.Chain,Nothing} = nothing`: Either nothing or a Flux model provided by the user and compatible with the dataset. In the former case, LaplaceRedux will use a standard MLP with 2 hidden layers with 20 neurons each.\n * `flux_loss = Flux.Losses.logitcrossentropy` : a Flux loss function\n * `optimiser = Adam()` a Flux optimiser\n * `epochs::Integer = 1000::(_ > 0)`: the number of training epochs.\n * `batch_size::Integer = 32::(_ > 0)`: the batch size.\n * `subset_of_weights::Symbol = :all::(_ in (:all, :last_layer, :subnetwork))`: the subset of weights to use, either `:all`, `:last_layer`, or `:subnetwork`.\n * `subnetwork_indices = nothing`: the indices of the subnetworks.\n * `hessian_structure::Union{HessianStructure,Symbol,String} = :full::(_ in (:full, :diagonal))`: the structure of the Hessian matrix, either `:full` or `:diagonal`.\n * `backend::Symbol = :GGN::(_ in (:GGN, :EmpiricalFisher))`: the backend to use, either `:GGN` or `:EmpiricalFisher`.\n * `observational_noise (alias σ)::Float64 = 1.0`: the standard deviation of the prior distribution.\n * `prior_mean (alias μ₀)::Float64 = 0.0`: the mean of the prior distribution.\n * `prior_precision_matrix (alias P₀)::Union{AbstractMatrix,UniformScaling,Nothing} = nothing`: the covariance matrix of the prior distribution.\n * `fit_prior_nsteps::Int = 100::(_ > 0)`: the number of steps used to fit the priors.\n * `link_approx::Symbol = :probit::(_ in (:probit, :plugin))`: the approximation to adopt to compute the probabilities.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `mean`: The mean of the posterior distribution.\n * `H`: The Hessian of the posterior distribution.\n * `P`: The precision matrix of the posterior distribution.\n * `cov_matrix`: The covariance matrix of the posterior distribution.\n * `n_data`: The number of data points.\n * `n_params`: The number of parameters.\n * `n_out`: The number of outputs.\n * `loss`: The loss value of the posterior distribution.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `loss_history`: an array containing the total loss per epoch.\n\n# Accessor functions\n\n * `training_losses(mach)`: return the loss history from report\n\n# Examples\n\n```\nusing MLJ\nLaplaceClassifier = @load LaplaceClassifier pkg=LaplaceRedux\n\nX, y = @load_iris\n\n# Define the Flux Chain model\nusing Flux\nmodel = Chain(\n Dense(4, 10, relu),\n Dense(10, 10, relu),\n Dense(10, 3)\n)\n\n#Define the LaplaceClassifier\nmodel = LaplaceClassifier(model=model)\n\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\ntraining_losses(mach) # loss history per epoch\npdf.(yhat, \"virginica\") # probabilities for the \"verginica\" class\nfitted_params(mach) # NamedTuple with the fitted params of Laplace\n\n```\n\nSee also [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl).\n""" -":name" = "LaplaceClassifier" -":human_name" = "laplace classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":getproperty", ":setproperty!", ":clean!", ":fit", ":fitted_params", ":is_same_except", ":predict", ":reformat", ":selectrows", ":training_losses", ":update"] -":hyperparameters" = "`(:model, :flux_loss, :optimiser, :epochs, :batch_size, :subset_of_weights, :subnetwork_indices, :hessian_structure, :backend, :observational_noise, :prior_mean, :prior_precision_matrix, :fit_prior_nsteps, :link_approx)`" -":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`true`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":show", ":fit", ":predict", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`true`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[LaplaceRedux.LaplaceRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" +[EvoTrees.EvoTreeGaussian] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractArray{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractArray{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "Apache" +":prediction_type" = ":probabilistic" +":load_path" = "EvoTrees.EvoTreeGaussian" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":is_pure_julia" = "`true`" +":human_name" = "evo tree gaussian" +":is_supervised" = "`true`" +":iteration_parameter" = ":nrounds" +":docstring" = """EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n * `min_weight=8.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`.\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n * `predict_mean(mach, Xnew)`\n * `predict_mode(mach, Xnew)`\n * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/Evovest/EvoTrees.jl" +":package_name" = "EvoTrees" +":name" = "EvoTreeGaussian" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "LaplaceRedux" -":package_license" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl/blob/main/LICENSE" -":load_path" = "LaplaceRedux.LaplaceRegressor" -":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" -":package_url" = "https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLaplaceRegressor\n```\n\nA model type for constructing a laplace regressor, based on [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLaplaceRegressor = @load LaplaceRegressor pkg=LaplaceRedux\n```\n\nDo `model = LaplaceRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LaplaceRegressor(model=...)`.\n\n`LaplaceRegressor` implements the [Laplace Redux – Effortless Bayesian Deep Learning](https://proceedings.neurips.cc/paper/2021/hash/a3923dbe2f702eff254d67b48ae2f06e-Abstract.html), originally published in Daxberger, E., Kristiadi, A., Immer, A., Eschenhagen, R., Bauer, M., Hennig, P. (2021): \"Laplace Redux – Effortless Bayesian Deep Learning.\", NIPS'21: Proceedings of the 35th International Conference on Neural Information Processing Systems*, Article No. 1537, pp. 20089–20103 for regression models.\n\n# Training data\n\nIn MLJ or MLJBase, given a dataset X,y and a `Flux_Chain` adapted to the dataset, pass the chain to the model\n\n```julia\nlaplace_model = LaplaceRegressor(model = Flux_Chain,kwargs...)\n```\n\nthen bind an instance `laplace_model` to data with\n\n```\nmach = machine(laplace_model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyperparameters (format: name-type-default value-restrictions)\n\n * `model::Union{Flux.Chain,Nothing} = nothing`: Either nothing or a Flux model provided by the user and compatible with the dataset. In the former case, LaplaceRedux will use a standard MLP with 2 hidden layers with 20 neurons each.\n * `flux_loss = Flux.Losses.logitcrossentropy` : a Flux loss function\n * `optimiser = Adam()` a Flux optimiser\n * `epochs::Integer = 1000::(_ > 0)`: the number of training epochs.\n * `batch_size::Integer = 32::(_ > 0)`: the batch size.\n * `subset_of_weights::Symbol = :all::(_ in (:all, :last_layer, :subnetwork))`: the subset of weights to use, either `:all`, `:last_layer`, or `:subnetwork`.\n * `subnetwork_indices = nothing`: the indices of the subnetworks.\n * `hessian_structure::Union{HessianStructure,Symbol,String} = :full::(_ in (:full, :diagonal))`: the structure of the Hessian matrix, either `:full` or `:diagonal`.\n * `backend::Symbol = :GGN::(_ in (:GGN, :EmpiricalFisher))`: the backend to use, either `:GGN` or `:EmpiricalFisher`.\n * `observational_noise (alias σ)::Float64 = 1.0`: the standard deviation of the prior distribution.\n * `prior_mean (alias μ₀)::Float64 = 0.0`: the mean of the prior distribution.\n * `prior_precision_matrix (alias P₀)::Union{AbstractMatrix,UniformScaling,Nothing} = nothing`: the covariance matrix of the prior distribution.\n * `fit_prior_nsteps::Int = 100::(_ > 0)`: the number of steps used to fit the priors.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic, but uncalibrated.\n * `predict_mode(mach, Xnew)`: instead return the mode of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `mean`: The mean of the posterior distribution.\n * `H`: The Hessian of the posterior distribution.\n * `P`: The precision matrix of the posterior distribution.\n * `cov_matrix`: The covariance matrix of the posterior distribution.\n * `n_data`: The number of data points.\n * `n_params`: The number of parameters.\n * `n_out`: The number of outputs.\n\n * `loss`: The loss value of the posterior distribution.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `loss_history`: an array containing the total loss per epoch.\n\n# Accessor functions\n\n * `training_losses(mach)`: return the loss history from report\n\n# Examples\n\n```\nusing MLJ\nusing Flux\nLaplaceRegressor = @load LaplaceRegressor pkg=LaplaceRedux\nmodel = Chain(\n Dense(4, 10, relu),\n Dense(10, 10, relu),\n Dense(10, 1)\n)\nmodel = LaplaceRegressor(model=model)\n\nX, y = make_regression(100, 4; noise=0.5, sparse=0.2, outliers=0.1)\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, 4; rng=123)\nyhat = predict(mach, Xnew) # probabilistic predictions\npredict_mode(mach, Xnew) # point predictions\ntraining_losses(mach) # loss history per epoch\nfitted_params(mach) # NamedTuple with the fitted params of Laplace\n\n```\n\nSee also [LaplaceRedux.jl](https://github.com/JuliaTrustworthyAI/LaplaceRedux.jl).\n""" -":name" = "LaplaceRegressor" -":human_name" = "laplace regressor" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":getproperty", ":setproperty!", ":clean!", ":fit", ":fitted_params", ":is_same_except", ":predict", ":reformat", ":selectrows", ":training_losses", ":update"] -":hyperparameters" = "`(:model, :flux_loss, :optimiser, :epochs, :batch_size, :subset_of_weights, :subnetwork_indices, :hessian_structure, :backend, :observational_noise, :prior_mean, :prior_precision_matrix, :fit_prior_nsteps)`" -":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`true`" -":reports_feature_importances" = "`false`" +":implemented_methods" = [":show", ":fit", ":predict", ":update"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`true`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[XGBoost.XGBoostCount] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[EvoTrees.EvoTreeMLE] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Count}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "Apache" +":prediction_type" = ":probabilistic" +":load_path" = "EvoTrees.EvoTreeMLE" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":is_pure_julia" = "`true`" +":human_name" = "evo tree mle" +":is_supervised" = "`true`" +":iteration_parameter" = ":nrounds" +":docstring" = """EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n\n`loss=:gaussian`: Loss to be be minimized during training. One of:\n\n * `:gaussian_mle`\n * `:logistic_mle`\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance. \n\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n * `min_weight=8.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`. Following losses are not GPU supported at the moment: `:logistic_mle`.\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n * `predict_mean(mach, Xnew)`\n * `predict_mode(mach, Xnew)`\n * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/Evovest/EvoTrees.jl" +":package_name" = "EvoTrees" +":name" = "EvoTreeMLE" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "XGBoost" -":package_license" = "unknown" -":load_path" = "MLJXGBoostInterface.XGBoostCount" -":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" -":package_url" = "https://github.com/dmlc/XGBoost.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nXGBoostCount\n```\n\nA model type for constructing a eXtreme Gradient Boosting Count Regressor, based on [XGBoost.jl](https://github.com/dmlc/XGBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nXGBoostCount = @load XGBoostCount pkg=XGBoost\n```\n\nDo `model = XGBoostCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `XGBoostCount(test=...)`.\n\nUnivariate discrete regression using [xgboost](https://xgboost.readthedocs.io/en/stable/index.html).\n\n# Training data\n\nIn `MLJ` or `MLJBase`, bind an instance `model` to data with\n\n```julia\nm = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features, either an `AbstractMatrix` or Tables.jl-compatible table.\n * `y`: is an `AbstractVector` continuous target.\n\nTrain using `fit!(m, rows=...)`.\n\n# Hyper-parameters\n\nSee https://xgboost.readthedocs.io/en/stable/parameter.html.\n""" -":name" = "XGBoostCount" -":human_name" = "eXtreme Gradient Boosting Count Regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!"] -":hyperparameters" = "`(:test, :num_round, :booster, :disable_default_eval_metric, :eta, :num_parallel_tree, :gamma, :max_depth, :min_child_weight, :max_delta_step, :subsample, :colsample_bytree, :colsample_bylevel, :colsample_bynode, :lambda, :alpha, :tree_method, :sketch_eps, :scale_pos_weight, :updater, :refresh_leaf, :process_type, :grow_policy, :max_leaves, :max_bin, :predictor, :sample_type, :normalize_type, :rate_drop, :one_drop, :skip_drop, :feature_selector, :top_k, :tweedie_variance_power, :objective, :base_score, :early_stopping_rounds, :watchlist, :nthread, :importance_type, :seed, :validate_parameters, :eval_metric)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":show", ":fit", ":predict", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[XGBoost.XGBoostRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[EvoTrees.EvoTreeRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "Apache" +":prediction_type" = ":deterministic" +":load_path" = "EvoTrees.EvoTreeRegressor" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":is_pure_julia" = "`true`" +":human_name" = "evo tree regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":nrounds" +":docstring" = """EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n * `loss=:mse`: Loss to be be minimized during training. One of:\n\n * `:mse`\n * `:mae`\n * `:logloss`\n * `:gamma`\n * `:tweedie`\n * `:quantile`\n * `:cred_var`: **experimental** credibility-based gains, derived from ratio of spread to process variance.\n * `:cred_std`: **experimental** credibility-based gains, derived from ratio of spread to process std deviation.\n * `metric`: The evaluation metric used to track evaluation data and serves as a basis for early stopping. Supported metrics are: \n\n * `:mse`: Mean-squared error. Adapted for general regression models.\n * `:rmse`: Root-mean-squared error. Adapted for general regression models.\n * `:mae`: Mean absolute error. Adapted for general regression models.\n * `:logloss`: Adapted for `:logistic` regression models.\n * `:poisson`: Poisson deviance. Adapted to `EvoTreeCount` count models.\n * `:gamma`: Gamma deviance. Adapted to regression problem on Gamma like, positively distributed targets.\n * `:tweedie`: Tweedie deviance. Adapted to regression problem on Tweedie like, positively distributed targets with probability mass at `y == 0`.\n * `:quantile`: The corresponds to an assymetric absolute error, where residuals are penalized according to alpha / (1-alpha) according to their sign.\n * `:gini`: The normalized Gini between pred and target\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `alpha::T=0.5`: Loss specific parameter in the [0, 1] range: - `:quantile`: target quantile for the regression.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n * `min_weight=1.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`.\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/Evovest/EvoTrees.jl" +":package_name" = "EvoTrees" +":name" = "EvoTreeRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "XGBoost" -":package_license" = "unknown" -":load_path" = "MLJXGBoostInterface.XGBoostRegressor" -":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" -":package_url" = "https://github.com/dmlc/XGBoost.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nXGBoostRegressor\n```\n\nA model type for constructing a eXtreme Gradient Boosting Regressor, based on [XGBoost.jl](https://github.com/dmlc/XGBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nXGBoostRegressor = @load XGBoostRegressor pkg=XGBoost\n```\n\nDo `model = XGBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `XGBoostRegressor(test=...)`.\n\nUnivariate continuous regression using [xgboost](https://xgboost.readthedocs.io/en/stable/index.html).\n\n# Training data\n\nIn `MLJ` or `MLJBase`, bind an instance `model` to data with\n\n```julia\nm = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features whose columns have `Continuous` element scitype; check column scitypes with `schema(X)`.\n * `y`: is an `AbstractVector` target with `Continuous` elements; check the scitype with `scitype(y)`.\n\nTrain using `fit!(m, rows=...)`.\n\n# Hyper-parameters\n\nSee https://xgboost.readthedocs.io/en/stable/parameter.html.\n""" -":name" = "XGBoostRegressor" -":human_name" = "eXtreme Gradient Boosting Regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!"] -":hyperparameters" = "`(:test, :num_round, :booster, :disable_default_eval_metric, :eta, :num_parallel_tree, :gamma, :max_depth, :min_child_weight, :max_delta_step, :subsample, :colsample_bytree, :colsample_bylevel, :colsample_bynode, :lambda, :alpha, :tree_method, :sketch_eps, :scale_pos_weight, :updater, :refresh_leaf, :process_type, :grow_policy, :max_leaves, :max_bin, :predictor, :sample_type, :normalize_type, :rate_drop, :one_drop, :skip_drop, :feature_selector, :top_k, :tweedie_variance_power, :objective, :base_score, :early_stopping_rounds, :watchlist, :nthread, :importance_type, :seed, :validate_parameters, :eval_metric)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":show", ":fit", ":predict", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[XGBoost.XGBoostClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[EvoTrees.EvoTreeCount] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" +":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "Apache" +":prediction_type" = ":probabilistic" +":load_path" = "EvoTrees.EvoTreeCount" +":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" +":is_pure_julia" = "`true`" +":human_name" = "evo tree count" +":is_supervised" = "`true`" +":iteration_parameter" = ":nrounds" +":docstring" = """EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n * `min_weight=1.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `:gpu`.\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n * `predict_mean(mach, Xnew)`\n * `predict_mode(mach, Xnew)`\n * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/Evovest/EvoTrees.jl" +":package_name" = "EvoTrees" +":name" = "EvoTreeCount" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "XGBoost" -":package_license" = "unknown" -":load_path" = "MLJXGBoostInterface.XGBoostClassifier" -":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" -":package_url" = "https://github.com/dmlc/XGBoost.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nXGBoostClassifier\n```\n\nA model type for constructing a eXtreme Gradient Boosting Classifier, based on [XGBoost.jl](https://github.com/dmlc/XGBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nXGBoostClassifier = @load XGBoostClassifier pkg=XGBoost\n```\n\nDo `model = XGBoostClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `XGBoostClassifier(test=...)`.\n\nUnivariate classification using [xgboost](https://xgboost.readthedocs.io/en/stable/index.html).\n\n# Training data\n\nIn `MLJ` or `MLJBase`, bind an instance `model` to data with\n\n```julia\nm = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features, either an `AbstractMatrix` or Tables.jl-compatible table.\n * `y`: is an `AbstractVector` `Finite` target.\n\nTrain using `fit!(m, rows=...)`.\n\n# Hyper-parameters\n\nSee https://xgboost.readthedocs.io/en/stable/parameter.html.\n""" -":name" = "XGBoostClassifier" -":human_name" = "eXtreme Gradient Boosting Classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":predict"] -":hyperparameters" = "`(:test, :num_round, :booster, :disable_default_eval_metric, :eta, :num_parallel_tree, :gamma, :max_depth, :min_child_weight, :max_delta_step, :subsample, :colsample_bytree, :colsample_bylevel, :colsample_bynode, :lambda, :alpha, :tree_method, :sketch_eps, :scale_pos_weight, :updater, :refresh_leaf, :process_type, :grow_policy, :max_leaves, :max_bin, :predictor, :sample_type, :normalize_type, :rate_drop, :one_drop, :skip_drop, :feature_selector, :top_k, :tweedie_variance_power, :objective, :base_score, :early_stopping_rounds, :watchlist, :nthread, :importance_type, :seed, :validate_parameters, :eval_metric)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":show", ":fit", ":predict", ":update"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Count}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" +[MLJTestInterface] -[EvoTrees.EvoTreeClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +[MLJModels.ConstantClassifier] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`()`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`()`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJModels.ConstantClassifier" +":hyperparameters" = "`()`" +":is_pure_julia" = "`true`" +":human_name" = "constant classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nConstantClassifier\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution `d` returned is the `UnivariateFinite` distribution based on frequency of classes observed in the training target data. So, `pdf(d, level)` is the number of times the training target takes on the value `level`. Use `predict_mode` instead of `predict` to obtain the training target mode instead. For more on the `UnivariateFinite` type, see the CategoricalDistributions.jl package.\n\nAlmost any reasonable model is expected to outperform `ConstantClassifier`, which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantClassifier()` to construct an instance.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nNone.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nclf = ConstantClassifier()\n\nX, y = @load_crabs # a table and a categorical vector\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\n# probabilistic predictions:\nyhat = predict(mach, Xnew)\nyhat[1]\n\n# raw probabilities:\npdf.(yhat, \"B\")\n\n# probability matrix:\nL = levels(y)\npdf(yhat, L)\n\n# point predictions:\npredict_mode(mach, Xnew)\n```\n\nSee also [`ConstantRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":package_name" = "MLJModels" +":name" = "ConstantClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "EvoTrees" -":package_license" = "Apache" -":load_path" = "EvoTrees.EvoTreeClassifier" -":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" -":package_url" = "https://github.com/Evovest/EvoTrees.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n * `min_weight=1.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `:gpu`.\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n""" -":name" = "EvoTreeClassifier" -":human_name" = "evo tree classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":implemented_methods" = [":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`true`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[EvoTrees.EvoTreeGaussian] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "EvoTrees" -":package_license" = "Apache" -":load_path" = "EvoTrees.EvoTreeGaussian" -":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" -":package_url" = "https://github.com/Evovest/EvoTrees.jl" +[MLJModels.Standardizer] ":is_wrapper" = "`false`" -":supports_weights" = "`true`" +":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.Standardizer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" +":is_pure_julia" = "`true`" +":human_name" = "standardizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=MLJModels\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":package_name" = "MLJModels" +":name" = "Standardizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n * `min_weight=8.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`.\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n * `predict_mean(mach, Xnew)`\n * `predict_mode(mach, Xnew)`\n * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n""" -":name" = "EvoTreeGaussian" -":human_name" = "evo tree gaussian" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[EvoTrees.EvoTreeMLE] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "EvoTrees" -":package_license" = "Apache" -":load_path" = "EvoTrees.EvoTreeMLE" -":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" -":package_url" = "https://github.com/Evovest/EvoTrees.jl" +[MLJModels.DeterministicConstantClassifier] ":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n\n`loss=:gaussian`: Loss to be be minimized during training. One of:\n\n * `:gaussian_mle`\n * `:logistic_mle`\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance. \n\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n * `min_weight=8.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`. Following losses are not GPU supported at the moment: `:logistic_mle`.\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n * `predict_mean(mach, Xnew)`\n * `predict_mode(mach, Xnew)`\n * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n""" -":name" = "EvoTreeMLE" -":human_name" = "evo tree mle" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" +":hyperparameter_types" = "`()`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`()`" ":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[EvoTrees.EvoTreeRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJModels.DeterministicConstantClassifier" +":hyperparameters" = "`()`" ":is_pure_julia" = "`true`" -":package_name" = "EvoTrees" -":package_license" = "Apache" -":load_path" = "EvoTrees.EvoTreeRegressor" -":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" -":package_url" = "https://github.com/Evovest/EvoTrees.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n * `loss=:mse`: Loss to be be minimized during training. One of:\n\n * `:mse`\n * `:mae`\n * `:logloss`\n * `:gamma`\n * `:tweedie`\n * `:quantile`\n * `:cred_var`: **experimental** credibility-based gains, derived from ratio of spread to process variance.\n * `:cred_std`: **experimental** credibility-based gains, derived from ratio of spread to process std deviation.\n * `metric`: The evaluation metric used to track evaluation data and serves as a basis for early stopping. Supported metrics are: \n\n * `:mse`: Mean-squared error. Adapted for general regression models.\n * `:rmse`: Root-mean-squared error. Adapted for general regression models.\n * `:mae`: Mean absolute error. Adapted for general regression models.\n * `:logloss`: Adapted for `:logistic` regression models.\n * `:poisson`: Poisson deviance. Adapted to `EvoTreeCount` count models.\n * `:gamma`: Gamma deviance. Adapted to regression problem on Gamma like, positively distributed targets.\n * `:tweedie`: Tweedie deviance. Adapted to regression problem on Tweedie like, positively distributed targets with probability mass at `y == 0`.\n * `:quantile`: The corresponds to an assymetric absolute error, where residuals are penalized according to alpha / (1-alpha) according to their sign.\n * `:gini`: The normalized Gini between pred and target\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n * `alpha::T=0.5`: Loss specific parameter in the [0, 1] range: - `:quantile`: target quantile for the regression.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n * `min_weight=1.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing). Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`.\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with `mach = machine(model, X, y)` where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n""" -":name" = "EvoTreeRegressor" -":human_name" = "evo tree regressor" +":human_name" = "deterministic constant classifier" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[EvoTrees.EvoTreeCount] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Count}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Count}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. """ ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":package_name" = "MLJModels" +":name" = "DeterministicConstantClassifier" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "EvoTrees" -":package_license" = "Apache" -":load_path" = "EvoTrees.EvoTreeCount" -":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" -":package_url" = "https://github.com/Evovest/EvoTrees.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n * `nrounds=100`: Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n * `eta=0.1`: Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n * `L2::T=0.0`: L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n * `lambda::T=0.0`: L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n * `gamma::T=0.0`: Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n * `max_depth=6`: Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n * `min_weight=1.0`: Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n * `rowsample=1.0`: Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n * `colsample=1.0`: Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n * `nbins=64`: Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n * `tree_type=:binary` Tree structure to be used. One of:\n\n * `:binary`: Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n * `:oblivious`: A common splitting condition is imposed to all nodes of a given depth.\n * `rng=123`: Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `:gpu`.\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n * `predict_mean(mach, Xnew)`\n * `predict_mode(mach, Xnew)`\n * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n""" -":name" = "EvoTreeCount" -":human_name" = "evo tree count" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"] -":hyperparameters" = "`(:loss, :metric, :nrounds, :bagging_size, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`" -":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":nrounds" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`true`" +":implemented_methods" = [":fit", ":predict"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[SymbolicRegression.SRTestRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.SRTestRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +[MLJModels.UnivariateTimeTypeToContinuous] ":is_wrapper" = "`false`" -":supports_weights" = "`true`" +":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.UnivariateTimeTypeToContinuous" +":hyperparameters" = "`(:zero_time, :step)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable transformer that creates continuous representations of temporally typed data" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJModels\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":package_name" = "MLJModels" +":name" = "UnivariateTimeTypeToContinuous" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSRTestRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nSRTestRegressor = @load SRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`SRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(11) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" -":name" = "SRTestRegressor" -":human_name" = "Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[SymbolicRegression.MultitargetSRTestRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRTestRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +[MLJModels.OneHotEncoder] ":is_wrapper" = "`false`" -":supports_weights" = "`true`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.OneHotEncoder" +":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" +":is_pure_julia" = "`true`" +":human_name" = "one-hot encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=MLJModels\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (column names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":package_name" = "MLJModels" +":name" = "OneHotEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetSRTestRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on\n[SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nMultitargetSRTestRegressor = @load MultitargetSRTestRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRTestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in\n`MultitargetSRTestRegressor(defaults=...)`.\n# Hyper-parameters\n\n- `defaults = nothing`\n\n- `binary_operators = nothing`\n\n- `unary_operators = nothing`\n\n- `maxsize = nothing`\n\n- `maxdepth = nothing`\n\n- `expression_spec = nothing`\n\n- `populations = nothing`\n\n- `population_size = nothing`\n\n- `ncycles_per_iteration = nothing`\n\n- `elementwise_loss = nothing`\n\n- `loss_function = nothing`\n\n- `loss_function_expression = nothing`\n\n- `dimensional_constraint_penalty = nothing`\n\n- `parsimony = nothing`\n\n- `constraints = nothing`\n\n- `nested_constraints = nothing`\n\n- `complexity_of_operators = nothing`\n\n- `complexity_of_constants = nothing`\n\n- `complexity_of_variables = nothing`\n\n- `warmup_maxsize_by = nothing`\n\n- `adaptive_parsimony_scaling = nothing`\n\n- `operator_enum_constructor = nothing`\n\n- `mutation_weights = nothing`\n\n- `crossover_probability = nothing`\n\n- `annealing = nothing`\n\n- `alpha = nothing`\n\n- `tournament_selection_n = nothing`\n\n- `tournament_selection_p = nothing`\n\n- `early_stop_condition = nothing`\n\n- `batching = nothing`\n\n- `batch_size = nothing`\n\n- `dimensionless_constants_only = false`\n\n- `loss_scale = log`\n\n- `complexity_mapping = nothing`\n\n- `use_frequency = true`\n\n- `use_frequency_in_tournament = true`\n\n- `should_simplify = nothing`\n\n- `perturbation_factor = nothing`\n\n- `probability_negate_constant = nothing`\n\n- `skip_mutation_failures = true`\n\n- `optimizer_algorithm = Optim.BFGS{LineSearches.InitialStatic{Float64}, LineSearches.BackTracking{Float64, Int64}, Nothing, Nothing, Optim.Flat}(LineSearches.InitialStatic{Float64}\n alpha: Float64 1.0\n scaled: Bool false\n, LineSearches.BackTracking{Float64, Int64}\n c_1: Float64 0.0001\n ρ_hi: Float64 0.5\n ρ_lo: Float64 0.1\n iterations: Int64 1000\n order: Int64 3\n maxstep: Float64 Inf\n cache: Nothing nothing\n, nothing, nothing, Optim.Flat())`\n\n- `optimizer_nrestarts = 2`\n\n- `optimizer_probability = 0.14`\n\n- `optimizer_iterations = nothing`\n\n- `optimizer_f_calls_limit = nothing`\n\n- `optimizer_options = nothing`\n\n- `should_optimize_constants = true`\n\n- `migration = true`\n\n- `hof_migration = true`\n\n- `fraction_replaced = nothing`\n\n- `fraction_replaced_hof = nothing`\n\n- `topn = nothing`\n\n- `timeout_in_seconds = nothing`\n\n- `max_evals = nothing`\n\n- `input_stream = Base.TTY(RawFD(11) paused, 0 bytes waiting)`\n\n- `turbo = false`\n\n- `bumper = false`\n\n- `autodiff_backend = nothing`\n\n- `deterministic = false`\n\n- `seed = nothing`\n\n- `verbosity = nothing`\n\n- `print_precision = 5`\n\n- `progress = nothing`\n\n- `output_directory = nothing`\n\n- `save_to_file = true`\n\n- `bin_constraints = nothing`\n\n- `una_constraints = nothing`\n\n- `terminal_width = nothing`\n\n- `use_recorder = false`\n\n- `recorder_file = pysr_recorder.json`\n\n- `define_helper_functions = true`\n\n- `expression_type = nothing`\n\n- `expression_options = nothing`\n\n- `node_type = nothing`\n\n- `output_file = nothing`\n\n- `fast_cycle = false`\n\n- `npopulations = nothing`\n\n- `npop = nothing`\n\n- `niterations = 1`\n\n- `parallelism = multithreading`\n\n- `numprocs = nothing`\n\n- `procs = nothing`\n\n- `addprocs_function = nothing`\n\n- `heap_size_hint_in_bytes = nothing`\n\n- `worker_imports = nothing`\n\n- `logger = nothing`\n\n- `runtests = true`\n\n- `run_id = nothing`\n\n- `loss_type = Nothing`\n\n- `selection_method = choose_best`\n\n- `dimensions_type = DynamicQuantities.SymbolicDimensions{DynamicQuantities.FixedRational{Int32, 25200}}`\n\n""" -":name" = "MultitargetSRTestRegressor" -":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -[SymbolicRegression.MultitargetSRRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.MultitargetSRRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +[MLJModels.ContinuousEncoder] ":is_wrapper" = "`false`" -":supports_weights" = "`true`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.ContinuousEncoder" +":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" +":is_pure_julia" = "`true`" +":human_name" = "continuous encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=MLJModels\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (columns) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping columns) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":package_name" = "MLJModels" +":name" = "ContinuousEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(defaults=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n""" -":name" = "MultitargetSRRegressor" -":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[SymbolicRegression.SRRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}, Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "SymbolicRegression" -":package_license" = "Apache-2.0" -":load_path" = "SymbolicRegression.MLJInterfaceModule.SRRegressor" -":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" -":package_url" = "https://github.com/MilesCranmer/SymbolicRegression.jl" +[MLJModels.UnivariateBoxCoxTransformer] ":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(defaults=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n * `w` is the observation weights which can either be `nothing` (default) or an `AbstractVector` whose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity. You can override this at prediction time by passing a named tuple with keys `data` and `idx`.\n\n# Hyper-parameters\n\n * `defaults`: What set of defaults to use for `Options`. The default, `nothing`, will simply take the default options from the current version of SymbolicRegression. However, you may also select the defaults from an earlier version, such as `v\"0.24.5\"`.\n * `binary_operators`: Vector of binary operators (functions) to use. Each operator should be defined for two input scalars, and one output scalar. All operators need to be defined over the entire real line (excluding infinity - these are stopped before they are input), or return `NaN` where not defined. For speed, define it so it takes two reals of the same type as input, and outputs the same type. For the SymbolicUtils simplification backend, you will need to define a generic method of the operator so it takes arbitrary types.\n * `operator_enum_constructor`: Constructor function to use for creating the operators enum. By default, OperatorEnum is used, but you can provide a different constructor like GenericOperatorEnum. The constructor must accept the keyword arguments 'binary*operators' and 'unary*operators'.\n * `unary_operators`: Same, but for unary operators (one input scalar, gives an output scalar).\n * `constraints`: Array of pairs specifying size constraints for each operator. The constraints for a binary operator should be a 2-tuple (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`. A size constraint is a limit to the size of the subtree in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the `^` operator can have arbitrary size (`-1`) in its left argument, but a maximum size of `3` in its right argument. Default is no constraints.\n * `batching`: Whether to evolve based on small mini-batches of data, rather than the entire dataset.\n * `batch_size`: What batch size to use if using batching.\n * `elementwise_loss`: What elementwise loss function to use. Can be one of the following losses, or any other loss of type `SupervisedLoss`. You can also pass a function that takes a scalar target (left argument), and scalar predicted (right argument), and returns a scalar. This will be averaged over the predicted data. If weights are supplied, your function should take a third argument for the weight scalar. Included losses: Regression: - `LPDistLoss{P}()`, - `L1DistLoss()`, - `L2DistLoss()` (mean square), - `LogitDistLoss()`, - `HuberLoss(d)`, - `L1EpsilonInsLoss(ϵ)`, - `L2EpsilonInsLoss(ϵ)`, - `PeriodicLoss(c)`, - `QuantileLoss(τ)`, Classification: - `ZeroOneLoss()`, - `PerceptronLoss()`, - `L1HingeLoss()`, - `SmoothedL1HingeLoss(γ)`, - `ModifiedHuberLoss()`, - `L2MarginLoss()`, - `ExpLoss()`, - `SigmoidLoss()`, - `DWDMarginLoss(q)`.\n * `loss_function`: Alternatively, you may redefine the loss used as any function of `tree::AbstractExpressionNode{T}`, `dataset::Dataset{T}`, and `options::AbstractOptions`, so long as you output a non-negative scalar of type `T`. This is useful if you want to use a loss that takes into account derivatives, or correlations across the dataset. This also means you could use a custom evaluation for a particular expression. If you are using `batching=true`, then your function should accept a fourth argument `idx`, which is either `nothing` (indicating that the full dataset should be used), or a vector of indices to use for the batch. For example,\n\n ```\n function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n prediction, flag = eval_tree_array(tree, dataset.X, options)\n if !flag\n return L(Inf)\n end\n return sum((prediction .- dataset.y) .^ 2) / dataset.n\n end\n ```\n * `loss_function_expression`: Similar to `loss_function`, but takes `AbstractExpression` instead of `AbstractExpressionNode` as its first argument. Useful for `TemplateExpressionSpec`.\n * `loss_scale`: Determines how loss values are scaled when computing scores. Options are:\n\n * `:log` (default): Uses logarithmic scaling of loss ratios. This mode requires non-negative loss values and is ideal for traditional loss functions that are always positive.\n * `:linear`: Uses direct differences between losses. This mode handles any loss values (including negative) and is useful for custom loss functions, especially those based on likelihoods.\n * `expression_spec::AbstractExpressionSpec`: A specification of what types of expressions to use in the search. For example, `ExpressionSpec()` (default). You can also see `TemplateExpressionSpec` and `ParametricExpressionSpec` for specialized cases.\n * `populations`: How many populations of equations to use.\n * `population_size`: How many equations in each population.\n * `ncycles_per_iteration`: How many generations to consider per iteration.\n * `tournament_selection_n`: Number of expressions considered in each tournament.\n * `tournament_selection_p`: The fittest expression in a tournament is to be selected with probability `p`, the next fittest with probability `p*(1-p)`, and so forth.\n * `topn`: Number of equations to return to the host process, and to consider for the hall of fame.\n * `complexity_of_operators`: What complexity should be assigned to each operator, and the occurrence of a constant or variable. By default, this is 1 for all operators. Can be a real number as well, in which case the complexity of an expression will be rounded to the nearest integer. Input this in the form of, e.g., [(^) => 3, sin => 2].\n * `complexity_of_constants`: What complexity should be assigned to use of a constant. By default, this is 1.\n * `complexity_of_variables`: What complexity should be assigned to use of a variable, which can also be a vector indicating different per-variable complexity. By default, this is 1.\n * `complexity_mapping`: Alternatively, you can pass a function that takes the expression as input and returns the complexity. Make sure that this operates on `AbstractExpression` (and unpacks to `AbstractExpressionNode`), and returns an integer.\n * `alpha`: The probability of accepting an equation mutation during regularized evolution is given by exp(-delta_loss/(alpha * T)), where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n * `maxsize`: Maximum size of equations during the search.\n * `maxdepth`: Maximum depth of equations during the search, by default this is set equal to the maxsize.\n * `parsimony`: A multiplicative factor for how much complexity is punished.\n * `dimensional_constraint_penalty`: An additive factor if the dimensional constraint is violated.\n * `dimensionless_constants_only`: Whether to only allow dimensionless constants.\n * `use_frequency`: Whether to use a parsimony that adapts to the relative proportion of equations at each complexity; this will ensure that there are a balanced number of equations considered for every complexity.\n * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described above inside the score, rather than just at the mutation accept/reject stage.\n * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term in the loss. Increase this if the search is spending too much time optimizing the most complex equations.\n * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions. This can be significantly faster, but is only compatible with certain operators. *Experimental!*\n * `bumper`: Whether to use Bumper.jl for faster evaluation. *Experimental!*\n * `migration`: Whether to migrate equations between processes.\n * `hof_migration`: Whether to migrate equations from the hall of fame to processes.\n * `fraction_replaced`: What fraction of each population to replace with migrated equations at the end of each cycle.\n * `fraction_replaced_hof`: What fraction to replace with hall of fame equations at the end of each cycle.\n * `should_simplify`: Whether to simplify equations. If you pass a custom objective, this will be set to `false`.\n * `should_optimize_constants`: Whether to use an optimization algorithm to periodically optimize constants in equations.\n * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default is `Optim.BFGS(linesearch=LineSearches.BackTracking())`.\n * `optimizer_nrestarts`: How many different random starting positions to consider for optimization of constants.\n * `optimizer_probability`: Probability of performing optimization of constants at the end of a given iteration.\n * `optimizer_iterations`: How many optimization iterations to perform. This gets passed to `Optim.Options` as `iterations`. The default is 8.\n * `optimizer_f_calls_limit`: How many function calls to allow during optimization. This gets passed to `Optim.Options` as `f_calls_limit`. The default is `10_000`.\n * `optimizer_options`: General options for the constant optimization. For details we refer to the documentation on `Optim.Options` from the `Optim.jl` package. Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n * `autodiff_backend`: The backend to use for differentiation, which should be an instance of `AbstractADType` (see `ADTypes.jl`). Default is `nothing`, which means `Optim.jl` will estimate gradients (likely with finite differences). You can also pass a symbolic version of the backend type, such as `:Zygote` for Zygote, `:Enzyme`, etc. Most backends will not work, and many will never work due to incompatibilities, though support for some is gradually being added.\n * `perturbation_factor`: When mutating a constant, either multiply or divide by (1+perturbation_factor)^(rand()+1).\n * `probability_negate_constant`: Probability of negating a constant in the equation when mutating it.\n * `mutation_weights`: Relative probabilities of the mutations. The struct `MutationWeights` (or any `AbstractMutationWeights`) should be passed to these options. See its documentation on `MutationWeights` for the different weights.\n * `crossover_probability`: Probability of performing crossover.\n * `annealing`: Whether to use simulated annealing.\n * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to `maxsize`. If nonzero, specifies the fraction through the search at which the maxsize should be reached.\n * `verbosity`: Whether to print debugging statements or not.\n * `print_precision`: How many digits to print when printing equations. By default, this is 5.\n * `output_directory`: The base directory to save output files to. Files will be saved in a subdirectory according to the run ID. By default, this is `./outputs`.\n * `save_to_file`: Whether to save equations to a file during the search.\n * `bin_constraints`: See `constraints`. This is the same, but specified for binary operators only (for example, if you have an operator that is both a binary and unary operator).\n * `una_constraints`: Likewise, for unary operators.\n * `seed`: What random seed to use. `nothing` uses no seed.\n * `progress`: Whether to use a progress bar output (`verbosity` will have no effect).\n * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value. Function - a function taking (loss, complexity) as arguments and returning true or false.\n * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n * `input_stream`: the stream to read user input from. By default, this is `stdin`. If you encounter issues with reading from `stdin`, like a hang, you can simply pass `devnull` to this argument.\n * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated expression with the original expression and proceed normally.\n * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example, `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`, but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos` can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified, it is assumed that it can be nested an unlimited number of times. This requires that there is no operator which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation). For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used in serial mode.\n * `define_helper_functions`: Whether to define helper functions for constructing and evaluating trees.\n * `niterations::Int=10`: The number of iterations to perform the search. More iterations will improve the results.\n * `parallelism=:multithreading`: What parallelism mode to use. The options are `:multithreading`, `:multiprocessing`, and `:serial`. By default, multithreading will be used. Multithreading uses less memory, but multiprocessing can handle multi-node compute. If using `:multithreading` mode, the number of threads available to julia are used. If using `:multiprocessing`, `numprocs` processes will be created dynamically if `procs` is unset. If you have already allocated processes, pass them to the `procs` argument and they will be used. You may also pass a string instead of a symbol, like `\"multithreading\"`.\n * `numprocs::Union{Int, Nothing}=nothing`: The number of processes to use, if you want `equation_search` to set this up automatically. By default this will be `4`, but can be any number (you should pick a number <= the number of cores available).\n * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up a distributed run manually with `procs = addprocs()` and `@everywhere`, pass the `procs` to this keyword argument.\n * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing (`parallelism=:multithreading`), and are not passing `procs` manually, then they will be allocated dynamically using `addprocs`. However, you may also pass a custom function to use instead of `addprocs`. This function should take a single positional argument, which is the number of processes to use, as well as the `lazy` keyword argument. For example, if set up on a slurm cluster, you could pass `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n * `heap_size_hint_in_bytes::Union{Int,Nothing}=nothing`: On Julia 1.9+, you may set the `--heap-size-hint` flag on Julia processes, recommending garbage collection once a process is close to the recommended size. This is important for long-running distributed jobs where each process has an independent memory, and can help avoid out-of-memory errors. By default, this is set to `Sys.free_memory() / numprocs`.\n * `worker_imports::Union{Vector{Symbol},Nothing}=nothing`: If you want to import additional modules on each worker, pass them here as a vector of symbols. By default some of the extensions will automatically be loaded when needed.\n * `runtests::Bool=true`: Whether to run (quick) tests before starting the search, to see if there will be any problems during the equation search related to the host environment.\n * `run_id::Union{String,Nothing}=nothing`: A unique identifier for the run. This will be used to store outputs from the run in the `outputs` directory. If not specified, a unique ID will be generated.\n * `loss_type::Type=Nothing`: If you would like to use a different type for the loss than for the data you passed, specify the type here. Note that if you pass complex data `::Complex{L}`, then the loss type will automatically be set to `L`.\n * `selection_method::Function`: Function to selection expression from the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best` for an example. This function should return a single integer specifying the index of the expression to use. By default, this maximizes the score (a pound-for-pound rating) of expressions reaching the threshold of 1.5x the minimum loss. To override this at prediction time, you can pass a named tuple with keys `data` and `idx` to `predict`. See the Operations section for details.\n * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing the units of the data. By default this is `DynamicQuantities.SymbolicDimensions`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n * `predict(mach, (data=Xnew, idx=i))`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. By passing a named tuple with keys `data` and `idx`, you are able to specify the equation you wish to evaluate in `idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `best_idx::Int`: The index of the best expression in the Pareto frontier, as determined by the `selection_method` function. Override in `predict` by passing a named tuple with keys `data` and `idx`.\n * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n""" -":name" = "SRRegressor" -":human_name" = "Symbolic Regression via Evolutionary Search" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:defaults, :binary_operators, :unary_operators, :maxsize, :maxdepth, :expression_spec, :populations, :population_size, :ncycles_per_iteration, :elementwise_loss, :loss_function, :loss_function_expression, :dimensional_constraint_penalty, :parsimony, :constraints, :nested_constraints, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :warmup_maxsize_by, :adaptive_parsimony_scaling, :operator_enum_constructor, :mutation_weights, :crossover_probability, :annealing, :alpha, :tournament_selection_n, :tournament_selection_p, :early_stop_condition, :batching, :batch_size, :dimensionless_constants_only, :loss_scale, :complexity_mapping, :use_frequency, :use_frequency_in_tournament, :should_simplify, :perturbation_factor, :probability_negate_constant, :skip_mutation_failures, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_f_calls_limit, :optimizer_options, :should_optimize_constants, :migration, :hof_migration, :fraction_replaced, :fraction_replaced_hof, :topn, :timeout_in_seconds, :max_evals, :input_stream, :turbo, :bumper, :autodiff_backend, :deterministic, :seed, :verbosity, :print_precision, :progress, :output_directory, :save_to_file, :bin_constraints, :una_constraints, :terminal_width, :use_recorder, :recorder_file, :define_helper_functions, :expression_type, :expression_options, :node_type, :output_file, :fast_cycle, :npopulations, :npop, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :heap_size_hint_in_bytes, :worker_imports, :logger, :runtests, :run_id, :loss_type, :selection_method, :dimensions_type)`" -":hyperparameter_types" = "`(\"Union{Nothing, VersionNumber}\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, SymbolicRegression.CoreModule.ExpressionSpecModule.AbstractExpressionSpec}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Type{<:DynamicExpressions.OperatorEnumModule.AbstractOperatorEnum}}\", \"Union{Nothing, SymbolicRegression.CoreModule.MutationWeightsModule.AbstractMutationWeights, NamedTuple, AbstractVector}\", \"Union{Nothing, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"Bool\", \"Symbol\", \"Union{Nothing, Function, SymbolicRegression.CoreModule.OptionsStructModule.ComplexityMapping}\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Bool\", \"Union{AbstractString, Optim.AbstractOptimizer}\", \"Int64\", \"AbstractFloat\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"IO\", \"Bool\", \"Bool\", \"Union{Nothing, ADTypes.AbstractADType, Symbol}\", \"Bool\", \"Any\", \"Union{Nothing, Integer}\", \"Integer\", \"Union{Nothing, Bool}\", \"Union{Nothing, String}\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, Integer}\", \"Bool\", \"AbstractString\", \"Bool\", \"Union{Nothing, Type{<:DynamicExpressions.ExpressionModule.AbstractExpression}}\", \"Union{Nothing, NamedTuple}\", \"Union{Nothing, Type{<:DynamicExpressions.NodeModule.AbstractExpressionNode}}\", \"Union{Nothing, AbstractString}\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Union{Nothing, Vector{Symbol}}\", \"Union{Nothing, SymbolicRegression.LoggingModule.AbstractSRLogger}\", \"Bool\", \"Union{Nothing, String}\", \"Type\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" +":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJModels.ConstantClassifier] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" +":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":load_path" = "MLJModels.ConstantClassifier" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.UnivariateBoxCoxTransformer" +":hyperparameters" = "`(:n, :shift)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable Box-Cox transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJModels\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" +":package_name" = "MLJModels" +":name" = "UnivariateBoxCoxTransformer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nConstantClassifier\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution `d` returned is the `UnivariateFinite` distribution based on frequency of classes observed in the training target data. So, `pdf(d, level)` is the number of times the training target takes on the value `level`. Use `predict_mode` instead of `predict` to obtain the training target mode instead. For more on the `UnivariateFinite` type, see the CategoricalDistributions.jl package.\n\nAlmost any reasonable model is expected to outperform `ConstantClassifier`, which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantClassifier()` to construct an instance.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nNone.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nclf = ConstantClassifier()\n\nX, y = @load_crabs # a table and a categorical vector\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\n# probabilistic predictions:\nyhat = predict(mach, Xnew)\nyhat[1]\n\n# raw probabilities:\npdf.(yhat, \"B\")\n\n# probability matrix:\nL = levels(y)\npdf(yhat, L)\n\n# point predictions:\npredict_mode(mach, Xnew)\n```\n\nSee also [`ConstantRegressor`](@ref)\n""" -":name" = "ConstantClassifier" -":human_name" = "constant classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`()`" -":hyperparameter_types" = "`()`" -":hyperparameter_ranges" = "`()`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[MLJModels.Standardizer] -":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.Standardizer" +[MLJModels.InteractionTransformer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.InteractionTransformer" +":hyperparameters" = "`(:order, :features)`" +":is_pure_julia" = "`true`" +":human_name" = "interaction transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=MLJModels\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "InteractionTransformer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=MLJModels\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" -":name" = "Standardizer" -":human_name" = "standardizer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" -":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -[MLJModels.DeterministicConstantClassifier] -":input_scitype" = "`ScientificTypesBase.Table`" +[MLJModels.ConstantRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Type{D} where D<:Distributions.Sampleable\",)`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":load_path" = "MLJModels.DeterministicConstantClassifier" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":prediction_type" = ":probabilistic" +":load_path" = "MLJModels.ConstantRegressor" +":hyperparameters" = "`(:distribution_type,)`" +":is_pure_julia" = "`true`" +":human_name" = "constant regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nConstantRegressor\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution returned is the one of the type specified that best fits the training target data. Use `predict_mean` or `predict_median` to predict the mean or median values instead. If not specified, a normal distribution is fit.\n\nAlmost any reasonable model is expected to outperform `ConstantRegressor` which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantRegressor()` or `model = ConstantRegressor(distribution=...)` to construct a model instance.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `distribution_type=Distributions.Normal`: The distribution to be fit to the target data. Must be a subtype of `Distributions.ContinuousUnivariateDistribution`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: Return instead the means of the probabilistic predictions returned above.\n * `predict_median(mach, Xnew)`: Return instead the medians of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nX, y = make_regression(10, 2) # synthetic data: a table and vector\nregressor = ConstantRegressor()\nmach = machine(regressor, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew, _ = make_regression(3, 2)\npredict(mach, Xnew)\npredict_mean(mach, Xnew)\n\n```\n\nSee also [`ConstantClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "ConstantRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. """ -":name" = "DeterministicConstantClassifier" -":human_name" = "deterministic constant classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`()`" -":hyperparameter_types" = "`()`" -":hyperparameter_ranges" = "`()`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJModels.UnivariateTimeTypeToContinuous] -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.UnivariateTimeTypeToContinuous" +[MLJModels.UnivariateDiscretizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\",)`" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.UnivariateDiscretizer" +":hyperparameters" = "`(:n_classes,)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJModels\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "UnivariateDiscretizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJModels\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" -":name" = "UnivariateTimeTypeToContinuous" -":human_name" = "single variable transformer that creates continuous representations of temporally typed data" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":transform"] -":hyperparameters" = "`(:zero_time, :step)`" -":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" ":constructor" = "`nothing`" -[MLJModels.OneHotEncoder] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.OneHotEncoder" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +[MLJModels.BinaryThresholdPredictor] +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Float64\")`" +":package_uuid" = "" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJModels.BinaryThresholdPredictor" +":hyperparameters" = "`(:model, :threshold)`" +":is_pure_julia" = "`false`" +":human_name" = "binary threshold predictor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "BinaryThresholdPredictor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=MLJModels\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (column names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" -":name" = "OneHotEncoder" -":human_name" = "one-hot encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] -":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`MLJModels.BinaryThresholdPredictor`" -[MLJModels.ContinuousEncoder] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +[MLJModels.FillImputer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":load_path" = "MLJModels.ContinuousEncoder" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.FillImputer" +":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" +":is_pure_julia" = "`true`" +":human_name" = "fill imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=MLJModels\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (columns) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "FillImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=unknown\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":name" = "ContinuousEncoder" -":human_name" = "continuous encoder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] -":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -[MLJModels.UnivariateBoxCoxTransformer] -":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.UnivariateBoxCoxTransformer" +[MLJModels.DeterministicConstantRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`()`" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`()`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJModels.DeterministicConstantRegressor" +":hyperparameters" = "`()`" +":is_pure_julia" = "`true`" +":human_name" = "deterministic constant regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. """ +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "DeterministicConstantRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJModels\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" -":name" = "UnivariateBoxCoxTransformer" -":human_name" = "single variable Box-Cox transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] -":hyperparameters" = "`(:n, :shift)`" -":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJModels.InteractionTransformer] -":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.InteractionTransformer" +[MLJModels.UnivariateStandardizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`()`" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`()`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.UnivariateStandardizer" +":hyperparameters" = "`()`" +":is_pure_julia" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "UnivariateStandardizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=unknown\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" -":name" = "InteractionTransformer" -":human_name" = "interaction transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Static`" -":implemented_methods" = [":clean!", ":transform"] -":hyperparameters" = "`(:order, :features)`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[MLJModels.ConstantRegressor] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.ConstantRegressor" +[MLJModels.UnivariateFillImputer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" +":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJModels.UnivariateFillImputer" +":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable fill imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJModels\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" ":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJModels" +":name" = "UnivariateFillImputer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nConstantRegressor\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution returned is the one of the type specified that best fits the training target data. Use `predict_mean` or `predict_median` to predict the mean or median values instead. If not specified, a normal distribution is fit.\n\nAlmost any reasonable model is expected to outperform `ConstantRegressor` which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantRegressor()` or `model = ConstantRegressor(distribution=...)` to construct a model instance.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `distribution_type=Distributions.Normal`: The distribution to be fit to the target data. Must be a subtype of `Distributions.ContinuousUnivariateDistribution`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: Return instead the means of the probabilistic predictions returned above.\n * `predict_median(mach, Xnew)`: Return instead the medians of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nX, y = make_regression(10, 2) # synthetic data: a table and vector\nregressor = ConstantRegressor()\nmach = machine(regressor, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew, _ = make_regression(3, 2)\npredict(mach, Xnew)\npredict_mean(mach, Xnew)\n\n```\n\nSee also [`ConstantClassifier`](@ref)\n""" -":name" = "ConstantRegressor" -":human_name" = "constant regressor" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":fitted_params", ":predict"] -":hyperparameters" = "`(:distribution_type,)`" -":hyperparameter_types" = "`(\"Type{D} where D<:Distributions.Sampleable\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":constructor" = "`nothing`" -[MLJModels.UnivariateDiscretizer] -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.UnivariateDiscretizer" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +[MLJGLMInterface.LinearBinaryClassifier] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearBinaryClassifier" +":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" +":is_pure_julia" = "`true`" +":human_name" = "linear binary classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `link=GLM.LogitLink`: The function which links the linear prediction function to the probability of a particular outcome or class. This must have type `GLM.Link01`. Options include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features used during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearBinaryClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJModels\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" -":name" = "UnivariateDiscretizer" -":human_name" = "single variable discretizer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] -":hyperparameters" = "`(:n_classes,)`" -":hyperparameter_types" = "`(\"Int64\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJModels.BinaryThresholdPredictor] -":input_scitype" = "`ScientificTypesBase.Unknown`" +[MLJGLMInterface.LinearCountRegressor] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearCountRegressor" +":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" +":is_pure_julia" = "`true`" +":human_name" = "linear count regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the model should fit.\n * `link=GLM.LogLink()`: The function which links the linear prediction function to the probability of a particular outcome or class. This should be one of the following: `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`, `GLM.SqrtLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n x = Xmat[i, :]\n rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n 0.9969008753103842\n -2.0255901752504775\n 3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearCountRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJModels" -":package_license" = "unknown" -":load_path" = "MLJModels.BinaryThresholdPredictor" -":package_uuid" = "" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n""" -":name" = "BinaryThresholdPredictor" -":human_name" = "binary threshold predictor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [] -":hyperparameters" = "`(:model, :threshold)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`BinaryThresholdPredictor`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" -[MLJModels.FillImputer] -":input_scitype" = "`ScientificTypesBase.Table`" -":output_scitype" = "`ScientificTypesBase.Table`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.FillImputer" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +[MLJGLMInterface.LinearRegressor] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=unknown\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" -":name" = "FillImputer" -":human_name" = "fill imputer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] -":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" ":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJModels.DeterministicConstantRegressor] -":input_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearRegressor" +":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`" +":is_pure_julia" = "`true`" +":human_name" = "linear regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence. If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.DeterministicConstantRegressor" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. """ -":name" = "DeterministicConstantRegressor" -":human_name" = "deterministic constant regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":fit", ":predict"] -":hyperparameters" = "`()`" -":hyperparameter_types" = "`()`" -":hyperparameter_ranges" = "`()`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[MLJModels.UnivariateStandardizer] -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.UnivariateStandardizer" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" -":name" = "UnivariateStandardizer" -":human_name" = "single variable discretizer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] -":hyperparameters" = "`()`" +[OneRule.OneRuleClassifier] +":constructor" = "`nothing`" ":hyperparameter_types" = "`()`" +":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" ":hyperparameter_ranges" = "`()`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" ":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateFillImputer] -":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "MLJModels" -":package_license" = "MIT" -":load_path" = "MLJModels.UnivariateFillImputer" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJModels\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" -":name" = "UnivariateFillImputer" -":human_name" = "single variable fill imputer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] -":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" -":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[OneRule.OneRuleClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "OneRule" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "OneRule.OneRuleClassifier" -":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" +":hyperparameters" = "`()`" +":is_pure_julia" = "`true`" +":human_name" = "one rule classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/roland-KA/OneRule.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OneRule" +":name" = "OneRuleClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n""" -":name" = "OneRuleClassifier" -":human_name" = "one rule classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`()`" -":hyperparameter_types" = "`()`" -":hyperparameter_ranges" = "`()`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [OutlierDetectionPython.MCDDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.MCDDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:store_precision, :assume_centered, :support_fraction, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "mcd detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "MCDDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" -":name" = "MCDDetector" -":human_name" = "mcd detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:store_precision, :assume_centered, :support_fraction, :random_state)`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.COPODDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.COPODDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_jobs,)`" +":is_pure_julia" = "`false`" +":human_name" = "copod detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "COPODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" -":name" = "COPODDetector" -":human_name" = "copod detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_jobs,)`" -":hyperparameter_types" = "`(\"Integer\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.HBOSDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.HBOSDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_bins, :alpha, :tol)`" +":is_pure_julia" = "`false`" +":human_name" = "hbos detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "HBOSDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" -":name" = "HBOSDetector" -":human_name" = "hbos detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_bins, :alpha, :tol)`" -":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.IForestDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.IForestDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" -":name" = "IForestDetector" +":hyperparameters" = "`(:n_estimators, :max_samples, :max_features, :bootstrap, :random_state, :verbose, :n_jobs)`" +":is_pure_julia" = "`false`" ":human_name" = "i forest detector" ":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_estimators, :max_samples, :max_features, :bootstrap, :random_state, :verbose, :n_jobs)`" -":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" +":docstring" = """```\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "IForestDetector" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.SOSDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.SOSDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:perplexity, :metric, :eps)`" +":is_pure_julia" = "`false`" +":human_name" = "sos detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "SOSDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" -":name" = "SOSDetector" -":human_name" = "sos detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:perplexity, :metric, :eps)`" -":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.ABODDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.ABODDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_neighbors, :method)`" +":is_pure_julia" = "`false`" +":human_name" = "abod detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "ABODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" -":name" = "ABODDetector" -":human_name" = "abod detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_neighbors, :method)`" -":hyperparameter_types" = "`(\"Integer\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LOFDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.LOFDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_neighbors, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs, :novelty)`" +":is_pure_julia" = "`false`" +":human_name" = "lof detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "LOFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" -":name" = "LOFDetector" -":human_name" = "lof detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_neighbors, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs, :novelty)`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.PCADetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.PCADetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "pca detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "PCADetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" -":name" = "PCADetector" -":human_name" = "pca detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" -":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.INNEDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.INNEDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "inne detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "INNEDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" -":name" = "INNEDetector" -":human_name" = "inne detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" -":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.OCSVMDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.OCSVMDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" +":is_pure_julia" = "`false`" +":human_name" = "ocsvm detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "OCSVMDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" -":name" = "OCSVMDetector" -":human_name" = "ocsvm detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" -":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.ECODDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.ECODDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_jobs,)`" +":is_pure_julia" = "`false`" +":human_name" = "ecod detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "ECODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" -":name" = "ECODDetector" -":human_name" = "ecod detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_jobs,)`" -":hyperparameter_types" = "`(\"Any\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.SODDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.SODDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" +":is_pure_julia" = "`false`" +":human_name" = "sod detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "SODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" -":name" = "SODDetector" -":human_name" = "sod detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LODADetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.LODADetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_bins, :n_random_cuts)`" +":is_pure_julia" = "`false`" +":human_name" = "loda detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "LODADetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" -":name" = "LODADetector" -":human_name" = "loda detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_bins, :n_random_cuts)`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.KDEDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.KDEDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" +":is_pure_julia" = "`false`" +":human_name" = "kde detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "KDEDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" -":name" = "KDEDetector" -":human_name" = "kde detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" -":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" ":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.CDDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"PythonCall.Core.Py\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.CDDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:model,)`" +":is_pure_julia" = "`false`" +":human_name" = "cd detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "CDDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" -":name" = "CDDetector" -":human_name" = "cd detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:model,)`" -":hyperparameter_types" = "`(\"PythonCall.Core.Py\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.KNNDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.KNNDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "knn detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "KNNDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" -":name" = "KNNDetector" -":human_name" = "knn detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.GMMDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.GMMDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" +":is_pure_julia" = "`false`" +":human_name" = "gmm detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "GMMDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" -":name" = "GMMDetector" -":human_name" = "gmm detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.COFDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.COFDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_neighbors, :method)`" +":is_pure_julia" = "`false`" +":human_name" = "cof detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "COFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" -":name" = "COFDetector" -":human_name" = "cof detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_neighbors, :method)`" -":hyperparameter_types" = "`(\"Integer\", \"String\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.CBLOFDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.CBLOFDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "cblof detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "CBLOFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" -":name" = "CBLOFDetector" -":human_name" = "cblof detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" -":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LOCIDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.LOCIDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:alpha, :k)`" +":is_pure_julia" = "`false`" +":human_name" = "loci detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "LOCIDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" -":name" = "LOCIDetector" -":human_name" = "loci detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:alpha, :k)`" -":hyperparameter_types" = "`(\"Real\", \"Real\")`" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.LMDDDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.LMDDDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "lmdd detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "OutlierDetectionPython" +":name" = "LMDDDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" -":name" = "LMDDDetector" -":human_name" = "lmdd detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" [OutlierDetectionPython.RODDetector] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "OutlierDetectionPython" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" +":prediction_type" = ":unknown" ":load_path" = "OutlierDetectionPython.RODDetector" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameters" = "`(:parallel_execution,)`" +":is_pure_julia" = "`false`" +":human_name" = "rod detector" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "RODDetector" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" + +[SelfOrganizingMaps.SelfOrganizingMap] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" +":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" +":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" +":is_pure_julia" = "`true`" +":human_name" = "self organizing map" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" +":package_name" = "SelfOrganizingMaps" +":name" = "SelfOrganizingMap" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" ":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" + +[InteractiveUtils] + +[MLJMultivariateStatsInterface.LDA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.LDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" +":is_pure_julia" = "`true`" +":human_name" = "linear discriminant analysis model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "LDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" -":name" = "RODDetector" -":human_name" = "rod detector" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] -":hyperparameters" = "`(:parallel_execution,)`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.MultitargetLinearRegressor] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" +":hyperparameters" = "`(:bias,)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget linear regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "MultitargetLinearRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.BayesianSubspaceLDA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :priors)`" +":is_pure_julia" = "`true`" +":human_name" = "Bayesian subspace LDA model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "BayesianSubspaceLDA" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.FactorAnalysis] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" +":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "factor analysis model" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "FactorAnalysis" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.LinearRegressor] ":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" +":hyperparameters" = "`(:bias,)`" +":is_pure_julia" = "`true`" +":human_name" = "linear regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "LinearRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" -[SelfOrganizingMaps.SelfOrganizingMap] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" +[MLJMultivariateStatsInterface.ICA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJMultivariateStatsInterface.ICA" +":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "independent component analysis model" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "ICA" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.PPCA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJMultivariateStatsInterface.PPCA" +":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "probabilistic PCA model" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PPCA" ":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.RidgeRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" ":is_pure_julia" = "`true`" -":package_name" = "SelfOrganizingMaps" +":human_name" = "ridge regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "RidgeRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.KernelPCA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" -":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" -":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" -":name" = "SelfOrganizingMap" -":human_name" = "self organizing map" -":is_supervised" = "`false`" ":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":load_path" = "MLJMultivariateStatsInterface.KernelPCA" +":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" +":is_pure_julia" = "`true`" +":human_name" = "kernel prinicipal component analysis model" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "KernelPCA" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[LIBSVM.SVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.MultitargetRidgeRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget ridge regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "MultitargetRidgeRegressor" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" -":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.SVC" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`true`" +":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":name" = "SVC" -":human_name" = "C-support vector classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[LIBSVM.EpsilonSVR] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" + +[MLJMultivariateStatsInterface.SubspaceLDA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :dist)`" +":is_pure_julia" = "`true`" +":human_name" = "subpace LDA model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "SubspaceLDA" ":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" -":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.EpsilonSVR" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nEpsilonSVR\n```\n\nA model type for constructing a ϵ-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM\n```\n\nDo `model = EpsilonSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EpsilonSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an adaptation of the classifier `SVC` to regression, but has an additional parameter `epsilon` (denoted $ϵ$ in the cited reference).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `epsilon=0.1` (range (0, `Inf`)): the parameter denoted $ϵ$ in the cited reference; `epsilon` is the thickness of the penalty-free neighborhood of the graph of the prediction function (\"slab\" or \"tube\"). Specifically, a data point `(x, y)` incurs no training loss unless it is outside this neighborhood; the further away it is from the this neighborhood, the greater the loss penalty.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM # model type\nmodel = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2512132502584155\n 0.007340201523624579\n -0.2482949812264707\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = EpsilonSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1121225361666656\n 0.04667702229741916\n -0.6958148424680672\n```\n\nSee also [`NuSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":name" = "EpsilonSVR" -":human_name" = "ϵ-support vector regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :epsilon, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[LIBSVM.LinearSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" -":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.LinearSVC" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`true`" -":supports_online" = "`false`" -":docstring" = """```\nLinearSVC\n```\n\nA model type for constructing a linear support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearSVC = @load LinearSVC pkg=LIBSVM\n```\n\nDo `model = LinearSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearSVC(solver=...)`.\n\nReference for algorithm and core C-library: Rong-En Fan et al (2008): \"LIBLINEAR: A Library for Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf). \n\nThis model type is similar to `SVC` from the same package with the setting `kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear case.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver, which must be one of the following from the LIBSVM.jl package:\n\n * `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal))\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized L2-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss support vector classification (primal)\n * `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized L1-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`: L1-regularized L2-loss support vector classification)\n * `LIBSVM.Linearsolver.L1R_LR`: L1-regularized logistic regression\n * `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual)\n * `tolerance::Float64=Inf`: tolerance for the stopping criterion;\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if `bias < 0`, no bias term added (default -1)\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Examples\n\n```\nusing MLJ\nimport LIBSVM\n\nLinearSVC = @load LinearSVC pkg=LIBSVM # model type\nmodel = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"versicolor\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/liblinear/blob/master/README).\n""" -":name" = "LinearSVC" -":human_name" = "linear support vector classifier" + +[MLJMultivariateStatsInterface.BayesianLDA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" +":is_pure_julia" = "`true`" +":human_name" = "Bayesian LDA model" ":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:solver, :tolerance, :cost, :bias)`" -":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "BayesianLDA" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[LIBSVM.ProbabilisticSVC] ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" -":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.ProbabilisticSVC" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`true`" + +[MLJMultivariateStatsInterface.PCA] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJMultivariateStatsInterface.PCA" +":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "pca" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PCA" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nProbabilisticSVC\n```\n\nA model type for constructing a probabilistic C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticSVC(kernel=...)`.\n\nThis model is identical to [`SVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to the total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return probabilistic predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM # model type\nmodel = ProbabilisticSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00186, versicolor=>0.003, virginica=>0.995)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000563, versicolor=>0.0554, virginica=>0.944)\n UnivariateFinite{Multiclass{3}}(setosa=>1.4e-6, versicolor=>1.68e-6, virginica=>1.0)\n\n\njulia> labels = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`SVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref), and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":name" = "ProbabilisticSVC" -":human_name" = "probabilistic C-support vector classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[MLJLIBSVMInterface.ProbabilisticNuSVC] +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" -":supports_training_losses" = "`false`" -":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" ":reporting_operations" = "`()`" -":constructor" = "`nothing`" - -[LIBSVM.NuSVR] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.NuSVR" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic ν-support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "LIBSVM" +":name" = "ProbabilisticNuSVC" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nNuSVR\n```\n\nA model type for constructing a ν-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNuSVR = @load NuSVR pkg=LIBSVM\n```\n\nDo `model = NuSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is a re-parameterization of `EpsilonSVR` in which the `epsilon` hyper-parameter is replaced with a new parameter `nu` (denoted $ν$ in the cited reference) which attempts to control the number of support vectors directly.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be\n\n called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Denoted $ν$ in the cited paper. Changing `nu` changes the thickness of some neighborhood of the graph of the prediction function (\"tube\" or \"slab\") and a training error is said to occur when a data point `(x, y)` lies outside of that neighborhood.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nNuSVR = @load NuSVR pkg=LIBSVM # model type\nmodel = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2008156459920009\n 0.1131520519131709\n -0.2076156254934889\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1211558175964662\n 0.06677125944808422\n -0.6817578942749346\n```\n\nSee also [`EpsilonSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":name" = "NuSVR" -":human_name" = "ν-support vector regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :nu, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[LIBSVM.NuSVC] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJLIBSVMInterface.EpsilonSVR] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.NuSVC" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.EpsilonSVR" +":hyperparameters" = "`(:kernel, :gamma, :epsilon, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ϵ-support vector regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nEpsilonSVR\n```\n\nA model type for constructing a ϵ-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM\n```\n\nDo `model = EpsilonSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EpsilonSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an adaptation of the classifier `SVC` to regression, but has an additional parameter `epsilon` (denoted $ϵ$ in the cited reference).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `epsilon=0.1` (range (0, `Inf`)): the parameter denoted $ϵ$ in the cited reference; `epsilon` is the thickness of the penalty-free neighborhood of the graph of the prediction function (\"slab\" or \"tube\"). Specifically, a data point `(x, y)` incurs no training loss unless it is outside this neighborhood; the further away it is from the this neighborhood, the greater the loss penalty.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nEpsilonSVR = @load EpsilonSVR pkg=LIBSVM # model type\nmodel = EpsilonSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2512132502584155\n 0.007340201523624579\n -0.2482949812264707\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = EpsilonSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1121225361666656\n 0.04667702229741916\n -0.6958148424680672\n```\n\nSee also [`NuSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "LIBSVM" +":name" = "EpsilonSVR" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nNuSVC\n```\n\nA model type for constructing a ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNuSVC = @load NuSVC pkg=LIBSVM\n```\n\nDo `model = NuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVC(kernel=...)`.\n\nThis model is a re-parameterization of the [`SVC`](@ref) classifier, where `nu` replaces `cost`, and is mathematically equivalent to it. The parameter `nu` allows more direct control over the number of support vectors (see under \"Hyper-parameters\").\n\nThis model always predicts actual class labels. For probabilistic predictions, use instead [`ProbabilisticNuSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nNuSVC = @load NuSVC pkg=LIBSVM # model type\nmodel = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\nSee also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":name" = "NuSVC" -":human_name" = "ν-support vector classifier" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[LIBSVM.ProbabilisticNuSVC] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +[MLJLIBSVMInterface.LinearSVC] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.LinearSVC" +":hyperparameters" = "`(:solver, :tolerance, :cost, :bias)`" +":is_pure_julia" = "`false`" +":human_name" = "linear support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLinearSVC\n```\n\nA model type for constructing a linear support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearSVC = @load LinearSVC pkg=LIBSVM\n```\n\nDo `model = LinearSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearSVC(solver=...)`.\n\nReference for algorithm and core C-library: Rong-En Fan et al (2008): \"LIBLINEAR: A Library for Large Linear Classification.\" *Journal of Machine Learning Research* 9 1871-1874. Available at [https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/liblinear.pdf). \n\nThis model type is similar to `SVC` from the same package with the setting `kernel=LIBSVM.Kernel.KERNEL.Linear`, but is optimized for the linear case.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `solver=LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: linear solver, which must be one of the following from the LIBSVM.jl package:\n\n * `LIBSVM.Linearsolver.L2R_LR`: L2-regularized logistic regression (primal))\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC_DUAL`: L2-regularized L2-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.L2R_L2LOSS_SVC`: L2-regularized L2-loss support vector classification (primal)\n * `LIBSVM.Linearsolver.L2R_L1LOSS_SVC_DUAL`: L2-regularized L1-loss support vector classification (dual)\n * `LIBSVM.Linearsolver.MCSVM_CS`: support vector classification by Crammer and Singer) `LIBSVM.Linearsolver.L1R_L2LOSS_SVC`: L1-regularized L2-loss support vector classification)\n * `LIBSVM.Linearsolver.L1R_LR`: L1-regularized logistic regression\n * `LIBSVM.Linearsolver.L2R_LR_DUAL`: L2-regularized logistic regression (dual)\n * `tolerance::Float64=Inf`: tolerance for the stopping criterion;\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `bias= -1.0`: if `bias >= 0`, instance `x` becomes `[x; bias]`; if `bias < 0`, no bias term added (default -1)\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Examples\n\n```\nusing MLJ\nimport LIBSVM\n\nLinearSVC = @load LinearSVC pkg=LIBSVM # model type\nmodel = LinearSVC(solver=LIBSVM.Linearsolver.L2R_LR) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"versicolor\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the [`SVC`](@ref) and [`NuSVC`](@ref) classifiers, and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/liblinear/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" +":package_name" = "LIBSVM" +":name" = "LinearSVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":docstring" = """```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" -":name" = "ProbabilisticNuSVC" -":human_name" = "probabilistic ν-support vector classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" -":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[LIBSVM.OneClassSVM] -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`false`" -":package_name" = "LIBSVM" -":package_license" = "unknown" -":load_path" = "MLJLIBSVMInterface.OneClassSVM" -":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" -":package_url" = "https://github.com/mpastell/LIBSVM.jl" +[MLJLIBSVMInterface.ProbabilisticSVC] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":docstring" = """```\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" -":name" = "OneClassSVM" -":human_name" = "one-class support vector machine" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticSVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic C-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" +":docstring" = """```\nProbabilisticSVC\n```\n\nA model type for constructing a probabilistic C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticSVC(kernel=...)`.\n\nThis model is identical to [`SVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to the total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return probabilistic predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticSVC = @load ProbabilisticSVC pkg=LIBSVM # model type\nmodel = ProbabilisticSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00186, versicolor=>0.003, virginica=>0.995)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000563, versicolor=>0.0554, virginica=>0.944)\n UnivariateFinite{Multiclass{3}}(setosa=>1.4e-6, versicolor=>1.68e-6, virginica=>1.0)\n\n\njulia> labels = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`SVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref), and [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "ProbabilisticSVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[TSVD.TSVDTransformer] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":target_in_fit" = "`false`" -":is_pure_julia" = "`true`" -":package_name" = "TSVD" -":package_license" = "MIT" -":load_path" = "MLJTSVDInterface.TSVDTransformer" -":package_uuid" = "9449cd9e-2762-5aa3-a617-5413e99d722e" -":package_url" = "https://github.com/JuliaLinearAlgebra/TSVD.jl" +[MLJLIBSVMInterface.NuSVR] ":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.NuSVR" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ν-support vector regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nNuSVR\n```\n\nA model type for constructing a ν-support vector regressor, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNuSVR = @load NuSVR pkg=LIBSVM\n```\n\nDo `model = NuSVR()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVR(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is a re-parameterization of `EpsilonSVR` in which the `epsilon` hyper-parameter is replaced with a new parameter `nu` (denoted $ν$ in the cited reference) which attempts to control the number of support vectors directly.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be\n\n called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of training errors and a lower bound of the fraction of support vectors. Denoted $ν$ in the cited paper. Changing `nu` changes the thickness of some neighborhood of the graph of the prediction function (\"tube\" or \"slab\") and a training error is said to occur when a data point `(x, y)` lies outside of that neighborhood.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nNuSVR = @load NuSVR pkg=LIBSVM # model type\nmodel = NuSVR(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = make_regression(rng=123) # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew, _ = make_regression(3, rng=123)\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 0.2008156459920009\n 0.1131520519131709\n -0.2076156254934889\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVR(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element Vector{Float64}:\n 1.1211558175964662\n 0.06677125944808422\n -0.6817578942749346\n```\n\nSee also [`EpsilonSVR`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "NuSVR" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = "Truncated SVD dimensionality reduction" -":name" = "TSVDTransformer" -":human_name" = "truncated SVD transformer" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":hyperparameters" = "`(:nvals, :maxiter, :rng)`" -":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[GLM.LinearBinaryClassifier] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +[MLJLIBSVMInterface.NuSVC] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.NuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "ν-support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nNuSVC\n```\n\nA model type for constructing a ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNuSVC = @load NuSVC pkg=LIBSVM\n```\n\nDo `model = NuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NuSVC(kernel=...)`.\n\nThis model is a re-parameterization of the [`SVC`](@ref) classifier, where `nu` replaces `cost`, and is mathematically equivalent to it. The parameter `nu` allows more direct control over the number of support vectors (see under \"Hyper-parameters\").\n\nThis model always predicts actual class labels. For probabilistic predictions, use instead [`ProbabilisticNuSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nNuSVC = @load NuSVC pkg=LIBSVM # model type\nmodel = NuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = NuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\nSee also the classifiers [`SVC`](@ref) and [`LinearSVC`](@ref), [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "NuSVC" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "GLM" -":package_license" = "MIT" -":load_path" = "MLJGLMInterface.LinearBinaryClassifier" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `link=GLM.LogitLink`: The function which links the linear prediction function to the probability of a particular outcome or class. This must have type `GLM.Link01`. Options include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features used during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n""" -":name" = "LinearBinaryClassifier" -":human_name" = "linear binary classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] -":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" -":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[GLM.LinearCountRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +[MLJLIBSVMInterface.SVC] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "unknown" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.SVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "C-support vector classifier" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "SVC" ":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "GLM" -":package_license" = "MIT" -":load_path" = "MLJGLMInterface.LinearCountRegressor" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`true`" -":supports_class_weights" = "`false`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the model should fit.\n * `link=GLM.LogLink()`: The function which links the linear prediction function to the probability of a particular outcome or class. This should be one of the following: `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`, `GLM.SqrtLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n x = Xmat[i, :]\n rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n 0.9969008753103842\n -2.0255901752504775\n 3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" -":name" = "LinearCountRegressor" -":human_name" = "linear count regressor" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] -":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" -":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[GLM.LinearRegressor] -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "GLM" -":package_license" = "MIT" -":load_path" = "MLJGLMInterface.LinearRegressor" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":package_url" = "https://github.com/JuliaStats/GLM.jl" +[MLJLIBSVMInterface.OneClassSVM] ":is_wrapper" = "`false`" -":supports_weights" = "`true`" +":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" +":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJLIBSVMInterface.OneClassSVM" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":is_pure_julia" = "`false`" +":human_name" = "one-class support vector machine" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/mpastell/LIBSVM.jl" +":package_name" = "LIBSVM" +":name" = "OneClassSVM" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence. If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" -":name" = "LinearRegressor" -":human_name" = "linear regressor" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] -":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" [MLJFlux.EntityEmbedder] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJFlux" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "unknown" +":prediction_type" = ":unknown" ":load_path" = "MLJFlux.EntityEmbedder" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameters" = "`(:model,)`" +":is_pure_julia" = "`true`" +":human_name" = "entity embedder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nEntityEmbedder(; model=supervised_mljflux_model)\n```\n\nWrapper for a MLJFlux supervised model, to convert it to a transformer. Such transformers are still presented a target variable in training, but they behave as transformers in MLJ pipelines. They are entity embedding transformers, in the sense of the article, \"Entity Embeddings of Categorical Variables\" by Cheng Guo, Felix Berkhahn.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(embed_model, X, y)\n```\n\nHere:\n\n * `embed_model` is an instance of `EntityEmbedder`, which wraps a supervised MLJFlux model, `model`, which must be an instance of one of these: `MLJFlux.NeuralNetworkClassifier`, `NeuralNetworkBinaryClassifier`, `MLJFlux.NeuralNetworkRegressor`,`MLJFlux.MultitargetNeuralNetworkRegressor`.\n * `X` is any table of input features supported by the model being wrapped. Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n * `y` is the target, which can be any `AbstractVector` supported by the model being wrapped.\n\nTrain the machine using `fit!(mach)`.\n\n# Examples\n\nIn the following example we wrap a `NeuralNetworkClassifier` as an `EntityEmbedder`, so that it can be used to supply continuously encoded features to a nearest neighbor model, which does not support categorical features.\n\n```julia\nusing MLJ\n\n# Setup some data\nN = 400\nX = (\n a = rand(Float32, N),\n b = categorical(rand(\"abcde\", N)),\n c = categorical(rand(\"ABCDEFGHIJ\", N), ordered = true),\n)\n\ny = categorical(rand(\"YN\", N));\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\n\n# Flux model to do learn the entity embeddings:\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\n# Other supervised model type, requiring `Continuous` features:\nKNNClassifier = @load KNNClassifier pkg=NearestNeighborModels\n\n# Instantiate the models:\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))\nemb = EntityEmbedder(clf)\n\n# For illustrative purposes, train the embedder on its own:\nmach = machine(emb, X, y)\nfit!(mach)\nXnew = transform(mach, X)\n\n# And compare feature scitypes:\nschema(X)\nschema(Xnew)\n\n# Now construct the pipeline:\npipe = emb |> KNNClassifier()\n\n# And train it to make predictions:\nmach = machine(pipe, X, y)\nfit!(mach)\npredict(mach, X)[1:3]\n```\n\nIt is to be emphasized that the `NeuralNertworkClassifier` is only being used to learn entity embeddings, not to make predictions, which here are made by `KNNClassifier()`.\n\nSee also [`NeuralNetworkClassifier`, `NeuralNetworkRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/FluxML/MLJFlux.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" +":package_name" = "MLJFlux" +":name" = "EntityEmbedder" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nEntityEmbedder(; model=mljflux_neural_model)\n```\n\n`EntityEmbedder` implements entity embeddings as in the \"Entity Embeddings of Categorical Variables\" paper by Cheng Guo, Felix Berkhahn.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features supported by the model being wrapped. Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n * `y` is the target, which can be any `AbstractVector` supported by the model being wrapped.\n\nTrain the machine using `fit!(mach)`.\n\n# Hyper-parameters\n\n * `model`: The supervised MLJFlux neural network model to be used for entity embedding. This must be one of these: `MLJFlux.NeuralNetworkClassifier`, `NeuralNetworkBinaryClassifier`, `MLJFlux.NeuralNetworkRegressor`,`MLJFlux.MultitargetNeuralNetworkRegressor`. The selected model may have hyperparameters that may affect embedding performance, the most notable of which could be the `builder` argument.\n\n# Operations\n\n * `transform(mach, Xnew)`: Transform the categorical features of `Xnew` into dense `Continuous` vectors using the trained `MLJFlux.EntityEmbedderLayer` layer present in the network. Check relevant documentation [here](https://fluxml.ai/MLJFlux.jl/dev/) and in particular, the `embedding_dims` hyperparameter.\n\n# Examples\n\n```julia\nusing MLJ\nusing CategoricalArrays\n\n# Setup some data\nN = 200\nX = (;\n Column1 = repeat(Float32[1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)),\n Column2 = categorical(repeat(['a', 'b', 'c', 'd', 'e'], Int(N / 5))),\n Column3 = categorical(repeat([\"b\", \"c\", \"d\", \"f\", \"f\"], Int(N / 5)), ordered = true),\n Column4 = repeat(Float32[1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)),\n Column5 = randn(Float32, N),\n Column6 = categorical(\n repeat([\"group1\", \"group1\", \"group2\", \"group2\", \"group3\"], Int(N / 5)),\n ),\n)\ny = categorical([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) # Classification\n\n# Initiate model\nEntityEmbedder = @load EntityEmbedder pkg=MLJFlux\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n\nclf = NeuralNetworkClassifier(embedding_dims=Dict(:Column2 => 2, :Column3 => 2))\n\nemb = EntityEmbedder(clf)\n\n# Construct machine\nmach = machine(emb, X, y)\n\n# Train model\nfit!(mach)\n\n# Transform data using model to encode categorical columns\nXnew = transform(mach, X)\nXnew\n```\n\nSee also [`NeuralNetworkClassifier`, `NeuralNetworkRegressor`](@ref)\n""" -":name" = "EntityEmbedder" -":human_name" = "entity embedder" -":is_supervised" = "`false`" -":prediction_type" = ":unknown" -":abstract_type" = "`MLJModelInterface.Unsupervised`" ":implemented_methods" = [":fit", ":fitted_params", ":training_losses", ":transform"] -":hyperparameters" = "`(:model,)`" -":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" -":hyperparameter_ranges" = "`(nothing,)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" [MLJFlux.MultitargetNeuralNetworkRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" -":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJFlux" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJFlux.MultitargetNeuralNetworkRegressor" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```\nMultitargetNeuralNetworkRegressor\n```\n\nA model type for constructing a multitarget neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = MultitargetNeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetNeuralNetworkRegressor(builder=...)`.\n\n`MultitargetNeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a multi-valued `Continuous` target, represented as a table, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any table or matrix of output targets whose element scitype is `Continuous`; check column scitypes with `schema(y)`. If `y` is a `Matrix`, it is assumed to have columns corresponding to variables and rows corresponding to observations.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `Linear`, `Short`, and `MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we apply a multi-target regression model to synthetic data:\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we generate some synthetic data (needs MLJBase 0.20.16 or higher):\n\n```julia\nX, y = make_regression(100, 9; n_targets = 2) # both tables\nschema(y)\nschema(X)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features and `n_out` the number of target variables (both known at `fit!` time), while `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating the regression model:\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor\nmodel = MultitargetNeuralNetworkRegressor(builder=builder, rng=123, epochs=20)\n```\n\nWe will arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\nFor experimenting with learning rate, see the [`NeuralNetworkRegressor`](@ref) example.\n\n```\npipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we can now compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=multitarget_l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # trains on all data `(X, y)`\nyhat = predict(mach, Xtest)\nmultitarget_l2(yhat, ytest)\n```\n\nSee also [`NeuralNetworkRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJFlux" +":name" = "MultitargetNeuralNetworkRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nMultitargetNeuralNetworkRegressor\n```\n\nA model type for constructing a multitarget neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = MultitargetNeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetNeuralNetworkRegressor(builder=...)`.\n\n`MultitargetNeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a multi-valued `Continuous` target, represented as a table, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any table or matrix of output targets whose element scitype is `Continuous`; check column scitypes with `schema(y)`. If `y` is a `Matrix`, it is assumed to have columns corresponding to variables and rows corresponding to observations.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `Linear`, `Short`, and `MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we apply a multi-target regression model to synthetic data:\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we generate some synthetic data (needs MLJBase 0.20.16 or higher):\n\n```julia\nX, y = make_regression(100, 9; n_targets = 2) # both tables\nschema(y)\nschema(X)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features and `n_out` the number of target variables (both known at `fit!` time), while `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating the regression model:\n\n```julia\nMultitargetNeuralNetworkRegressor = @load MultitargetNeuralNetworkRegressor\nmodel = MultitargetNeuralNetworkRegressor(builder=builder, rng=123, epochs=20)\n```\n\nWe will arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\nFor experimenting with learning rate, see the [`NeuralNetworkRegressor`](@ref) example.\n\n```\npipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we can now compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=multitarget_l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # trains on all data `(X, y)`\nyhat = predict(mach, Xtest)\nmultitarget_l2(yhat, ytest)\n```\n\nSee also [`NeuralNetworkRegressor`](@ref)\n""" -":name" = "MultitargetNeuralNetworkRegressor" -":human_name" = "multitarget neural network regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":epochs" +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`(:optimiser, :builder)`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJFlux.NeuralNetworkClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJFlux" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "MLJFlux.NeuralNetworkClassifier" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```\nNeuralNetworkClassifier\n```\n\nA model type for constructing a neural network classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkClassifier(builder=...)`.\n\n`NeuralNetworkClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass` or `OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights.] Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ\nusing Flux\nimport RDatasets\nimport Optimisers\n```\n\nFirst, we can load the data:\n\n```julia\niris = RDatasets.dataset(\"datasets\", \"iris\");\ny, X = unpack(iris, ==(:Species), rng=123); # a vector and a table\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\nclf = NeuralNetworkClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(clf, X, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia\nclf.optimiser = Optimisers.Adam(clf.optimiser.eta * 2)\nclf.epochs = clf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(clf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(clf, X, y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy)\nusing Plots\nplot(curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\")\n\n```\n\nSee also [`ImageClassifier`](@ref), [`NeuralNetworkBinaryClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nNeuralNetworkClassifier\n```\n\nA model type for constructing a neural network classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkClassifier(builder=...)`.\n\n`NeuralNetworkClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a `Multiclass` or `OrderedFactor` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass` or `OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights.] Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ\nusing Flux\nimport RDatasets\nimport Optimisers\n```\n\nFirst, we can load the data:\n\n```julia\niris = RDatasets.dataset(\"datasets\", \"iris\");\ny, X = unpack(iris, ==(:Species), rng=123); # a vector and a table\nNeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux\nclf = NeuralNetworkClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(clf, X, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia\nclf.optimiser = Optimisers.Adam(clf.optimiser.eta * 2)\nclf.epochs = clf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(clf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(clf, X, y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy)\nusing Plots\nplot(curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\")\n\n```\n\nSee also [`ImageClassifier`](@ref), [`NeuralNetworkBinaryClassifier`](@ref).\n""" -":name" = "NeuralNetworkClassifier" -":human_name" = "neural network classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":epochs" +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`(:optimiser, :builder)`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJFlux.ImageClassifier] -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Image}, AbstractVector{<:ScientificTypesBase.Multiclass}}`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJFlux" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "MLJFlux.ImageClassifier" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration)`" +":is_pure_julia" = "`true`" +":human_name" = "image classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```\nImageClassifier\n```\n\nA model type for constructing a image classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nImageClassifier = @load ImageClassifier pkg=MLJFlux\n```\n\nDo `model = ImageClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ImageClassifier(builder=...)`.\n\n`ImageClassifier` classifies images using a neural network adapted to the type of images provided (color or gray scale). Predictions are probabilistic. Users provide a recipe for constructing the network, based on properties of the image encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any `AbstractVector` of images with `ColorImage` or `GrayImage` scitype; check the scitype with `scitype(X)` and refer to ScientificTypes.jl documentation on coercing typical image formats into an appropriate type.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder`: An MLJFlux builder that constructs the neural network. The fallback builds a depth-16 VGG architecture adapted to the image size and number of target classes, with no batch normalization; see the Metalhead.jl documentation for details. See the example below for a user-specified builder. A convenience macro `@builder` is also available. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we use MLJFlux and a custom builder to classify the MNIST image dataset.\n\n```julia\nusing MLJ\nusing Flux\nimport MLJFlux\nimport Optimisers\nimport MLJIteration # for `skip` control\n```\n\nFirst we want to download the MNIST dataset, and unpack into images and labels:\n\n```julia\nimport MLDatasets: MNIST\ndata = MNIST(split=:train)\nimages, labels = data.features, data.targets\n```\n\nIn MLJ, integers cannot be used for encoding categorical data, so we must coerce them into the `Multiclass` scitype:\n\n```julia\nlabels = coerce(labels, Multiclass);\n```\n\nAbove `images` is a single array but MLJFlux requires the images to be a vector of individual image arrays:\n\n```\nimages = coerce(images, GrayImage);\nimages[1]\n```\n\nWe start by defining a suitable `builder` object. This is a recipe for building the neural network. Our builder will work for images of any (constant) size, whether they be color or black and white (ie, single or multi-channel). The architecture always consists of six alternating convolution and max-pool layers, and a final dense layer; the filter size and the number of channels after each convolution layer is customizable.\n\n```julia\nimport MLJFlux\n\nstruct MyConvBuilder\n filter_size::Int\n channels1::Int\n channels2::Int\n channels3::Int\nend\n\nmake2d(x::AbstractArray) = reshape(x, :, size(x)[end])\n\nfunction MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)\n k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3\n mod(k, 2) == 1 || error(\"`filter_size` must be odd. \")\n p = div(k - 1, 2) # padding to preserve image size\n init = Flux.glorot_uniform(rng)\n front = Chain(\n Conv((k, k), n_channels => c1, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c1 => c2, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c2 => c3, pad=(p, p), relu, init=init),\n MaxPool((2 ,2)),\n make2d)\n d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first\n return Chain(front, Dense(d, n_out, init=init))\nend\n```\n\nIt is important to note that in our `build` function, there is no final `softmax`. This is applied by default in all MLJFlux classifiers (override this using the `finaliser` hyperparameter).\n\nNow that our builder is defined, we can instantiate the actual MLJFlux model. If you have a GPU, you can substitute in `acceleration=CUDALibs()` below to speed up training.\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\nclf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),\n batch_size=50,\n epochs=10,\n rng=123)\n```\n\nYou can add Flux options such as `optimiser` and `loss` in the snippet above. Currently, `loss` must be a flux-compatible loss, and not an MLJ measure.\n\nNext, we can bind the model with the data in a machine, and train using the first 500 images:\n\n```julia\nmach = machine(clf, images, labels);\nfit!(mach, rows=1:500, verbosity=2);\nreport(mach)\nchain = fitted_params(mach)\nFlux.params(chain)[2]\n```\n\nWe can tack on 20 more epochs by modifying the `epochs` field, and iteratively fit some more:\n\n```julia\nclf.epochs = clf.epochs + 20\nfit!(mach, rows=1:500, verbosity=2);\n```\n\nWe can also make predictions and calculate an out-of-sample loss estimate, using any MLJ measure (loss/score):\n\n```julia\npredicted_labels = predict(mach, rows=501:1000);\ncross_entropy(predicted_labels, labels[501:1000])\n```\n\nThe preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows:\n\n```julia\nevaluate!(mach,\n resampling=Holdout(fraction_train=0.5),\n measure=cross_entropy,\n rows=1:1000,\n verbosity=0)\n```\n\nSee also [`NeuralNetworkClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJFlux" +":name" = "ImageClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nImageClassifier\n```\n\nA model type for constructing a image classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nImageClassifier = @load ImageClassifier pkg=MLJFlux\n```\n\nDo `model = ImageClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ImageClassifier(builder=...)`.\n\n`ImageClassifier` classifies images using a neural network adapted to the type of images provided (color or gray scale). Predictions are probabilistic. Users provide a recipe for constructing the network, based on properties of the image encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any `AbstractVector` of images with `ColorImage` or `GrayImage` scitype; check the scitype with `scitype(X)` and refer to ScientificTypes.jl documentation on coercing typical image formats into an appropriate type.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder`: An MLJFlux builder that constructs the neural network. The fallback builds a depth-16 VGG architecture adapted to the image size and number of target classes, with no batch normalization; see the Metalhead.jl documentation for details. See the example below for a user-specified builder. A convenience macro `@builder` is also available. See also `finaliser` below.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.crossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.crossentropy`: Standard multiclass classification loss, also known as the log loss.\n * `Flux.logitcrossentopy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `softmax` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default softmax finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between 8 and\n\n 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a\n\n GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.softmax`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.softmax`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we use MLJFlux and a custom builder to classify the MNIST image dataset.\n\n```julia\nusing MLJ\nusing Flux\nimport MLJFlux\nimport Optimisers\nimport MLJIteration # for `skip` control\n```\n\nFirst we want to download the MNIST dataset, and unpack into images and labels:\n\n```julia\nimport MLDatasets: MNIST\ndata = MNIST(split=:train)\nimages, labels = data.features, data.targets\n```\n\nIn MLJ, integers cannot be used for encoding categorical data, so we must coerce them into the `Multiclass` scitype:\n\n```julia\nlabels = coerce(labels, Multiclass);\n```\n\nAbove `images` is a single array but MLJFlux requires the images to be a vector of individual image arrays:\n\n```\nimages = coerce(images, GrayImage);\nimages[1]\n```\n\nWe start by defining a suitable `builder` object. This is a recipe for building the neural network. Our builder will work for images of any (constant) size, whether they be color or black and white (ie, single or multi-channel). The architecture always consists of six alternating convolution and max-pool layers, and a final dense layer; the filter size and the number of channels after each convolution layer is customizable.\n\n```julia\nimport MLJFlux\n\nstruct MyConvBuilder\n filter_size::Int\n channels1::Int\n channels2::Int\n channels3::Int\nend\n\nmake2d(x::AbstractArray) = reshape(x, :, size(x)[end])\n\nfunction MLJFlux.build(b::MyConvBuilder, rng, n_in, n_out, n_channels)\n k, c1, c2, c3 = b.filter_size, b.channels1, b.channels2, b.channels3\n mod(k, 2) == 1 || error(\"`filter_size` must be odd. \")\n p = div(k - 1, 2) # padding to preserve image size\n init = Flux.glorot_uniform(rng)\n front = Chain(\n Conv((k, k), n_channels => c1, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c1 => c2, pad=(p, p), relu, init=init),\n MaxPool((2, 2)),\n Conv((k, k), c2 => c3, pad=(p, p), relu, init=init),\n MaxPool((2 ,2)),\n make2d)\n d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first\n return Chain(front, Dense(d, n_out, init=init))\nend\n```\n\nIt is important to note that in our `build` function, there is no final `softmax`. This is applied by default in all MLJFlux classifiers (override this using the `finaliser` hyperparameter).\n\nNow that our builder is defined, we can instantiate the actual MLJFlux model. If you have a GPU, you can substitute in `acceleration=CUDALibs()` below to speed up training.\n\n```julia\nImageClassifier = @load ImageClassifier pkg=MLJFlux\nclf = ImageClassifier(builder=MyConvBuilder(3, 16, 32, 32),\n batch_size=50,\n epochs=10,\n rng=123)\n```\n\nYou can add Flux options such as `optimiser` and `loss` in the snippet above. Currently, `loss` must be a flux-compatible loss, and not an MLJ measure.\n\nNext, we can bind the model with the data in a machine, and train using the first 500 images:\n\n```julia\nmach = machine(clf, images, labels);\nfit!(mach, rows=1:500, verbosity=2);\nreport(mach)\nchain = fitted_params(mach)\nFlux.params(chain)[2]\n```\n\nWe can tack on 20 more epochs by modifying the `epochs` field, and iteratively fit some more:\n\n```julia\nclf.epochs = clf.epochs + 20\nfit!(mach, rows=1:500, verbosity=2);\n```\n\nWe can also make predictions and calculate an out-of-sample loss estimate, using any MLJ measure (loss/score):\n\n```julia\npredicted_labels = predict(mach, rows=501:1000);\ncross_entropy(predicted_labels, labels[501:1000])\n```\n\nThe preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows:\n\n```julia\nevaluate!(mach,\n resampling=Holdout(fraction_train=0.5),\n measure=cross_entropy,\n rows=1:1000,\n verbosity=0)\n```\n\nSee also [`NeuralNetworkClassifier`](@ref).\n""" -":name" = "ImageClassifier" -":human_name" = "image classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":epochs" +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Multiclass}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Multiclass}`" ":supports_training_losses" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`(:optimiser, :builder)`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJFlux.NeuralNetworkBinaryClassifier] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Binary}}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJFlux" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" +":prediction_type" = ":probabilistic" ":load_path" = "MLJFlux.NeuralNetworkBinaryClassifier" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network binary classifier" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```\nNeuralNetworkBinaryClassifier\n```\n\nA model type for constructing a neural network binary classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkBinaryClassifier(builder=...)`.\n\n`NeuralNetworkBinaryClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a binary (`Multiclass{2}` or `OrderedFactor{2}`) target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass{2}` or `OrderedFactor{2}`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.binarycrossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.binarycrossentropy`: Standard binary classification loss, also known as the log loss.\n * `Flux.logitbinarycrossentropy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `σ` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default sigmoid finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.binary_focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.σ`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.σ`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ, Flux\nimport Optimisers\nimport RDatasets\n```\n\nFirst, we can load the data:\n\n```julia\nmtcars = RDatasets.dataset(\"datasets\", \"mtcars\");\ny, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec]));\n```\n\nNote that `y` is a vector and `X` a table.\n\n```julia\ny = categorical(y) # classifier takes catogorical input\nX_f32 = Float32.(X) # To match floating point type of the neural network layers\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\nbclf = NeuralNetworkBinaryClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(bclf, X_f32, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia-repl\njulia> bclf.optimiser\nAdam(0.001, (0.9, 0.999), 1.0e-8)\n```\n\n```julia\nbclf.optimiser = Optimisers.Adam(eta = bclf.optimiser.eta * 2)\nbclf.epochs = bclf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X_f32), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(bclf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(\n bclf,\n X_f32,\n y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy,\n)\nusing Plots\nplot(\n curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\",\n)\n\n```\n\nSee also [`ImageClassifier`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkBinaryClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nNeuralNetworkBinaryClassifier\n```\n\nA model type for constructing a neural network binary classifier, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkBinaryClassifier(builder=...)`.\n\n`NeuralNetworkBinaryClassifier` is for training a data-dependent Flux.jl neural network for making probabilistic predictions of a binary (`Multiclass{2}` or `OrderedFactor{2}`) target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass{2}` or `OrderedFactor{2}`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` below.\n * `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the updating of the weights of the network. For further reference, see [the Flux optimiser documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.binarycrossentropy`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification task, the most natural loss functions are:\n\n * `Flux.binarycrossentropy`: Standard binary classification loss, also known as the log loss.\n * `Flux.logitbinarycrossentropy`: Mathematically equal to crossentropy, but numerically more stable than finalising the outputs with `σ` and then calculating crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's default sigmoid finaliser, and understand that the output of `predict` is then unnormalized (no longer probabilistic).\n * `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives.\n * `Flux.binary_focal_loss`: Used with highly imbalanced data. Weights harder examples more than easier examples.\n\n Currently MLJ measures are not supported values of `loss`.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `finaliser=Flux.σ`: The final activation function of the neural network (applied after the network defined by `builder`). Defaults to `Flux.σ`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network. This includes the final layer specified by `finaliser` (eg, `softmax`).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalised if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a classification model using the Iris dataset. This is a very basic example, using a default builder and no standardization. For a more advanced illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and examples in the MLJFlux.jl documentation.\n\n```julia\nusing MLJ, Flux\nimport Optimisers\nimport RDatasets\n```\n\nFirst, we can load the data:\n\n```julia\nmtcars = RDatasets.dataset(\"datasets\", \"mtcars\");\ny, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec]));\n```\n\nNote that `y` is a vector and `X` a table.\n\n```julia\ny = categorical(y) # classifier takes catogorical input\nX_f32 = Float32.(X) # To match floating point type of the neural network layers\nNeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux\nbclf = NeuralNetworkBinaryClassifier()\n```\n\nNext, we can train the model:\n\n```julia\nmach = machine(bclf, X_f32, y)\nfit!(mach)\n```\n\nWe can train the model in an incremental fashion, altering the learning rate as we go, provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations:\n\n```julia-repl\njulia> bclf.optimiser\nAdam(0.001, (0.9, 0.999), 1.0e-8)\n```\n\n```julia\nbclf.optimiser = Optimisers.Adam(eta = bclf.optimiser.eta * 2)\nbclf.epochs = bclf.epochs + 5\n\nfit!(mach, verbosity=2) # trains 5 more epochs\n```\n\nWe can inspect the mean training loss using the `cross_entropy` function:\n\n```julia\ntraining_loss = cross_entropy(predict(mach, X_f32), y)\n```\n\nAnd we can access the Flux chain (model) using `fitted_params`:\n\n```julia\nchain = fitted_params(mach).chain\n```\n\nFinally, we can see how the out-of-sample performance changes over time, using MLJ's `learning_curve` function:\n\n```julia\nr = range(bclf, :epochs, lower=1, upper=200, scale=:log10)\ncurve = learning_curve(\n bclf,\n X_f32,\n y,\n range=r,\n resampling=Holdout(fraction_train=0.7),\n measure=cross_entropy,\n)\nusing Plots\nplot(\n curve.parameter_values,\n curve.measurements,\n xlab=curve.parameter_name,\n xscale=curve.parameter_scale,\n ylab = \"Cross Entropy\",\n)\n\n```\n\nSee also [`ImageClassifier`](@ref).\n""" -":name" = "NeuralNetworkBinaryClassifier" -":human_name" = "neural network binary classifier" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:builder, :finaliser, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":epochs" +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":supports_training_losses" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`(:optimiser, :builder)`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJFlux.NeuralNetworkRegressor] -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" +":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`true`" -":package_name" = "MLJFlux" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" +":prediction_type" = ":deterministic" ":load_path" = "MLJFlux.NeuralNetworkRegressor" -":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" +":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":epochs" +":docstring" = """```\nNeuralNetworkRegressor\n```\n\nA model type for constructing a neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkRegressor(builder=...)`.\n\n`NeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a `Continuous` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increasing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalized if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a regression model for the Boston house price dataset.\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we load in the data: The `:MEDV` column becomes the target vector `y`, and all remaining columns go into a table `X`, with the exception of `:CHAS`:\n\n```julia\ndata = OpenML.load(531); # Loads from https://www.openml.org/d/531\ny, X = unpack(data, ==(:MEDV), !=(:CHAS); rng=123);\n\nscitype(y)\nschema(X)\n```\n\nSince MLJFlux models do not handle ordered factors, we'll treat `:RAD` as `Continuous`:\n\n```julia\nX = coerce(X, :RAD=>Continuous)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features (which will be known at `fit!` time) and `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below). We also have the parameter `n_out` which is the number of output features. As we are doing single target regression, the value passed will always be `1`, but the builder we define will also work for [`MultitargetNeuralNetworkRegressor`](@ref).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating a model:\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\nmodel = NeuralNetworkRegressor(\n builder=builder,\n rng=123,\n epochs=20\n)\n```\n\nWe arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`.\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\n## Experimenting with learning rate\n\nWe can visually compare how the learning rate affects the predictions:\n\n```julia\nusing Plots\n\nrates = rates = [5e-5, 1e-4, 0.005, 0.001, 0.05]\nplt=plot()\n\nforeach(rates) do η\n pipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(η)\n fit!(mach, force=true, verbosity=0)\n losses =\n report(mach).transformed_target_model_deterministic.model.training_losses[3:end]\n plot!(1:length(losses), losses, label=η)\nend\n\nplt\n\npipe.transformed_target_model_deterministic.model.optimiser.eta = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # train on `(X, y)`\nyhat = predict(mach, Xtest)\nl2(yhat, ytest)\n```\n\nThese losses, for the pipeline model, refer to the target on the original, unstandardized, scale.\n\nFor implementing stopping criterion and other iteration controls, refer to examples linked from the MLJFlux documentation.\n\nSee also [`MultitargetNeuralNetworkRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/alan-turing-institute/MLJFlux.jl" -":is_wrapper" = "`false`" -":supports_weights" = "`false`" +":package_name" = "MLJFlux" +":name" = "NeuralNetworkRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nNeuralNetworkRegressor\n```\n\nA model type for constructing a neural network regressor, based on [MLJFlux.jl](https://github.com/alan-turing-institute/MLJFlux.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\n```\n\nDo `model = NeuralNetworkRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `NeuralNetworkRegressor(builder=...)`.\n\n`NeuralNetworkRegressor` is for training a data-dependent Flux.jl neural network to predict a `Continuous` target, given a table of `Continuous` features. Users provide a recipe for constructing the network, based on properties of the data that is encountered, by specifying an appropriate `builder`. See MLJFlux documentation for more on builders.\n\nIn addition to features with `Continuous` scientific element type, this model supports categorical features in the input table. If present, such features are embedded into dense vectors by the use of an additional `EntityEmbedderLayer` layer after the input, as described in Entity Embeddings of Categorical Variables by Cheng Guo, Felix Berkhahn arXiv, 2016.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` provides input features and is either: (i) a `Matrix` with `Continuous` element scitype (typically `Float32`); or (ii) a table of input features (eg, a `DataFrame`) whose columns have `Continuous`, `Multiclass` or `OrderedFactor` element scitype; check column scitypes with `schema(X)`. If any `Multiclass` or `OrderedFactor` features appear, the constructed network will use an `EntityEmbedderLayer` layer to transform them into dense vectors. If `X` is a `Matrix`, it is assumed that columns correspond to features and rows corresponding to observations.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `builder=MLJFlux.Linear(σ=Flux.relu)`: An MLJFlux builder that constructs a neural network. Possible `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See MLJFlux documentation for more on builders, and the example below for using the `@builder` convenience macro.\n * `optimiser::Optimisers.Adam()`: An Optimisers.jl optimiser. The optimiser performs the updating of the weights of the network. To choose a learning rate (the update rate of the optimizer), a good rule of thumb is to start out at `10e-3`, and tune using powers of `10` between `1` and `1e-7`.\n * `loss=Flux.mse`: The loss function which the network will optimize. Should be a function which can be called in the form `loss(yhat, y)`. Possible loss functions are listed in [the Flux loss function documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a regression task, natural loss functions are:\n\n * `Flux.mse`\n * `Flux.mae`\n * `Flux.msle`\n * `Flux.huber_loss`\n\n Currently MLJ measures are not supported as loss functions here.\n * `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents one pass through the complete the training dataset.\n * `batch_size::int=1`: the batch size to be used for training, representing the number of samples per update of the network weights. Typically, batch size is between `8` and `512`. Increasing batch size may accelerate training if `acceleration=CUDALibs()` and a GPU is available.\n * `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value in the range `[0, ∞)`. Note the history reports unpenalized losses.\n * `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 represents L2 regularization, and a value of 1 represents L1 regularization.\n * `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during training. The default is `Random.default_rng()`.\n * `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting a machine if the associated optimiser has changed. If `true`, the associated machine will retrain from scratch on `fit!` call, otherwise it will not.\n * `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For Training on GPU, use `CUDALibs()`.\n * `embedding_dims`: a `Dict` whose keys are names of categorical features, given as symbols, and whose values are numbers representing the desired dimensionality of the entity embeddings of such features: an integer value of `7`, say, sets the embedding dimensionality to `7`; a float value of `0.5`, say, sets the embedding dimensionality to `ceil(0.5 * c)`, where `c` is the number of feature levels. Unspecified feature dimensionality defaults to `min(c - 1, 10)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n * `transform(mach, Xnew)`: Assuming `Xnew` has the same schema as `X`, transform the categorical features of `Xnew` into dense `Continuous` vectors using the `MLJFlux.EntityEmbedderLayer` layer present in the network. Does nothing in case the model was trained on an input `X` that lacks categorical features.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `chain`: The trained \"chain\" (Flux.jl model), namely the series of layers, functions, and activations which make up the neural network.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `training_losses`: A vector of training losses (penalized if `lambda != 0`) in historical order, of length `epochs + 1`. The first element is the pre-training loss.\n\n# Examples\n\nIn this example we build a regression model for the Boston house price dataset.\n\n```julia\nusing MLJ\nimport MLJFlux\nusing Flux\nimport Optimisers\n```\n\nFirst, we load in the data: The `:MEDV` column becomes the target vector `y`, and all remaining columns go into a table `X`, with the exception of `:CHAS`:\n\n```julia\ndata = OpenML.load(531); # Loads from https://www.openml.org/d/531\ny, X = unpack(data, ==(:MEDV), !=(:CHAS); rng=123);\n\nscitype(y)\nschema(X)\n```\n\nSince MLJFlux models do not handle ordered factors, we'll treat `:RAD` as `Continuous`:\n\n```julia\nX = coerce(X, :RAD=>Continuous)\n```\n\nSplitting off a test set:\n\n```julia\n(X, Xtest), (y, ytest) = partition((X, y), 0.7, multi=true);\n```\n\nNext, we can define a `builder`, making use of a convenience macro to do so. In the following `@builder` call, `n_in` is a proxy for the number input features (which will be known at `fit!` time) and `rng` is a proxy for a RNG (which will be passed from the `rng` field of `model` defined below). We also have the parameter `n_out` which is the number of output features. As we are doing single target regression, the value passed will always be `1`, but the builder we define will also work for [`MultitargetNeuralNetworkRegressor`](@ref).\n\n```julia\nbuilder = MLJFlux.@builder begin\n init=Flux.glorot_uniform(rng)\n Chain(\n Dense(n_in, 64, relu, init=init),\n Dense(64, 32, relu, init=init),\n Dense(32, n_out, init=init),\n )\nend\n```\n\nInstantiating a model:\n\n```julia\nNeuralNetworkRegressor = @load NeuralNetworkRegressor pkg=MLJFlux\nmodel = NeuralNetworkRegressor(\n builder=builder,\n rng=123,\n epochs=20\n)\n```\n\nWe arrange for standardization of the the target by wrapping our model in `TransformedTargetModel`, and standardization of the features by inserting the wrapped model in a pipeline:\n\n```julia\npipe = Standardizer |> TransformedTargetModel(model, transformer=Standardizer)\n```\n\nIf we fit with a high verbosity (>1), we will see the losses during training. We can also see the losses in the output of `report(mach)`.\n\n```julia\nmach = machine(pipe, X, y)\nfit!(mach, verbosity=2)\n\n# first element initial loss, 2:end per epoch training losses\nreport(mach).transformed_target_model_deterministic.model.training_losses\n```\n\n## Experimenting with learning rate\n\nWe can visually compare how the learning rate affects the predictions:\n\n```julia\nusing Plots\n\nrates = rates = [5e-5, 1e-4, 0.005, 0.001, 0.05]\nplt=plot()\n\nforeach(rates) do η\n pipe.transformed_target_model_deterministic.model.optimiser = Optimisers.Adam(η)\n fit!(mach, force=true, verbosity=0)\n losses =\n report(mach).transformed_target_model_deterministic.model.training_losses[3:end]\n plot!(1:length(losses), losses, label=η)\nend\n\nplt\n\npipe.transformed_target_model_deterministic.model.optimiser.eta = Optimisers.Adam(0.0001)\n```\n\nWith the learning rate fixed, we compute a CV estimate of the performance (using all data bound to `mach`) and compare this with performance on the test set:\n\n```julia\n# CV estimate, based on `(X, y)`:\nevaluate!(mach, resampling=CV(nfolds=5), measure=l2)\n\n# loss for `(Xtest, test)`:\nfit!(mach) # train on `(X, y)`\nyhat = predict(mach, Xtest)\nl2(yhat, ytest)\n```\n\nThese losses, for the pipeline model, refer to the target on the original, unstandardized, scale.\n\nFor implementing stopping criterion and other iteration controls, refer to examples linked from the MLJFlux documentation.\n\nSee also [`MultitargetNeuralNetworkRegressor`](@ref)\n""" -":name" = "NeuralNetworkRegressor" -":human_name" = "neural network regressor" -":is_supervised" = "`true`" -":prediction_type" = ":deterministic" -":abstract_type" = "`MLJModelInterface.Deterministic`" ":implemented_methods" = [":predict"] -":hyperparameters" = "`(:builder, :optimiser, :loss, :epochs, :batch_size, :lambda, :alpha, :rng, :optimiser_changes_trigger_retraining, :acceleration, :embedding_dims)`" -":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = ":epochs" +":deep_properties" = "`(:optimiser, :builder)`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`(:optimiser, :builder)`" -":reporting_operations" = "`()`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`false`" [MLJEnsembles.EnsembleModel] -":input_scitype" = "`ScientificTypesBase.Unknown`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" +":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":target_in_fit" = "`true`" -":is_pure_julia" = "`false`" -":package_name" = "MLJEnsembles" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "unknown" +":prediction_type" = ":probabilistic" ":load_path" = "MLJEnsembles.EnsembleModel" -":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" +":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic ensemble model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" -":is_wrapper" = "`true`" -":supports_weights" = "`false`" +":package_name" = "MLJEnsembles" +":name" = "EnsembleModel" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":docstring" = """```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" -":name" = "EnsembleModel" -":human_name" = "probabilistic ensemble model" -":is_supervised" = "`true`" -":prediction_type" = ":probabilistic" -":abstract_type" = "`MLJModelInterface.Probabilistic`" ":implemented_methods" = [] -":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":iteration_parameter" = "`nothing`" +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":deep_properties" = "`()`" -":reporting_operations" = "`()`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`EnsembleModel`" From 53892ae81dece255b9d1171a8448488349a4c9c7 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 23 Jul 2025 00:18:02 +1200 Subject: [PATCH 15/22] adjust metadata loading to address use of API pkgs in Metadata.toml keys --- src/metadata.jl | 12 +++++++----- test/registry.jl | 8 +++++++- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/metadata.jl b/src/metadata.jl index e17093d..8fd1a9b 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -61,16 +61,18 @@ end # to define INFO_GIVEN_HANDLE function info_given_handle(metadata_file) metadata = LittleDict(TOML.parsefile(metadata_file)) - metadata_given_pkg = decode_dic(metadata) + metadata_given_api_pkg = decode_dic(metadata) # build info_given_handle dictionary: ret = Dict{Handle}{Any}() - packages = keys(metadata_given_pkg) - for pkg in packages - info_given_name = metadata_given_pkg[pkg] + packages = keys(metadata_given_api_pkg) + for api_pkg in packages + info_given_name = metadata_given_api_pkg[api_pkg] for name in keys(info_given_name) + info = info_given_name[name] + pkg = info[:package_name] handle = Handle(name, pkg) - ret[handle] = info_given_name[name] + ret[handle] = info end end return ret diff --git a/test/registry.jl b/test/registry.jl index 37d17c6..757d3cf 100644 --- a/test/registry.jl +++ b/test/registry.jl @@ -20,6 +20,7 @@ using Suppressor close(stream) registry = dirname(filename) # we need to rename project file to ..../Project.toml mv(filename, joinpath(registry, "Project.toml"); force=true) + rm(joinpath(registry, "Manifest.toml"); force=true) # open a new Julia process in which to activate the registry project and attempt to # load all models: @@ -27,10 +28,13 @@ using Suppressor # define the programs to run in that process: # 1. To instantiate the registry environment: + this_package = joinpath(@__DIR__, "..") program1 = quote using Pkg Pkg.activate($registry) + Pkg.develop(path=$this_package) # MLJModels Pkg.instantiate() + Pkg.status() using MLJModels !isempty(keys(Pkg.dependencies())) end @@ -42,7 +46,9 @@ using Suppressor # remove `@suppress` to debug: @test @suppress remotecall_fetch(Main.eval, id, program1) @info "Attempting to load all MLJ Model Registry models into a Julia process. " - @info "Be patient, this may take five minutes or so..." + @info "Be patient, this may take a few minutes ..." @test @suppress remotecall_fetch(Main.eval, id, program2) rmprocs(id) end + +true From 57161f8934212f59dff0c3cd150a9b14329569cc Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 22 Aug 2025 13:50:32 +1200 Subject: [PATCH 16/22] rm Transformers.jl; tests still need registry update to pass --- Project.toml | 14 +- src/MLJModels.jl | 1 - src/builtins/Transformers.jl | 2017 --------------------------------- test/GaussianProcesses.jl | 42 - test/NaiveBayes.jl | 112 -- test/builtins/Transformers.jl | 647 ----------- test/model_search.jl | 4 +- test/runtests.jl | 5 +- 8 files changed, 16 insertions(+), 2826 deletions(-) delete mode 100644 src/builtins/Transformers.jl delete mode 100755 test/GaussianProcesses.jl delete mode 100755 test/NaiveBayes.jl delete mode 100644 test/builtins/Transformers.jl diff --git a/Project.toml b/Project.toml index af20ac6..ed3c499 100644 --- a/Project.toml +++ b/Project.toml @@ -38,6 +38,7 @@ Distributions = "0.25" InteractiveUtils = "1" LinearAlgebra = "1" MLJModelInterface = "1.10" +MLJTransforms = "0.1.1" Markdown = "1" OrderedCollections = "1.1" Parameters = "0.12" @@ -58,10 +59,21 @@ Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661" MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728" +MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Distributed", "MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Suppressor", "Test"] +test = [ + "Distributed", + "MLJBase", + "MLJDecisionTreeInterface", + "MLJMultivariateStatsInterface", + "MLJTransforms", + "Pkg", + "StableRNGs", + "Suppressor", + "Test", +] diff --git a/src/MLJModels.jl b/src/MLJModels.jl index ffaae0c..3f85e81 100755 --- a/src/MLJModels.jl +++ b/src/MLJModels.jl @@ -60,7 +60,6 @@ include("utilities.jl") # load built-in models: include("builtins/Constant.jl") -include("builtins/Transformers.jl") include("builtins/ThresholdPredictors.jl") # declare paths to the metadata and associated project file: diff --git a/src/builtins/Transformers.jl b/src/builtins/Transformers.jl deleted file mode 100644 index fd00d43..0000000 --- a/src/builtins/Transformers.jl +++ /dev/null @@ -1,2017 +0,0 @@ -# Note that doc-strings appear at the end - - -# # IMPUTER - -round_median(v::AbstractVector) = v -> round(eltype(v), median(v)) - -_median(e) = skipmissing(e) |> median -_round_median(e) = skipmissing(e) |> (f -> round(eltype(f), median(f))) -_mode(e) = skipmissing(e) |> mode - -@with_kw_noshow mutable struct UnivariateFillImputer <: Unsupervised - continuous_fill::Function = _median - count_fill::Function = _round_median - finite_fill::Function = _mode -end - -function MMI.fit(transformer::UnivariateFillImputer, - verbosity::Integer, - v) - - filler(v, ::Type) = throw(ArgumentError( - "Imputation is not supported for vectors "* - "of elscitype $(elscitype(v)).")) - filler(v, ::Type{<:Union{Continuous,Missing}}) = - transformer.continuous_fill(v) - filler(v, ::Type{<:Union{Count,Missing}}) = - transformer.count_fill(v) - filler(v, ::Type{<:Union{Finite,Missing}}) = - transformer.finite_fill(v) - - fitresult = (filler=filler(v, elscitype(v)),) - cache = nothing - report = NamedTuple() - - return fitresult, cache, report - -end - -function replace_missing(::Type{<:Finite}, vnew, filler) - all(in(levels(filler)), levels(vnew)) || - error(ArgumentError("The `column::AbstractVector{<:Finite}`"* - " to be transformed must contain the same levels"* - " as the categorical value to be imputed")) - replace(vnew, missing => filler) - -end - -function replace_missing(::Type, vnew, filler) - T = promote_type(nonmissing(eltype(vnew)), typeof(filler)) - w_tight = similar(vnew, T) - @inbounds for i in eachindex(vnew) - if ismissing(vnew[i]) - w_tight[i] = filler - else - w_tight[i] = vnew[i] - end - end - return w_tight -end - -function MMI.transform(transformer::UnivariateFillImputer, - fitresult, - vnew) - - filler = fitresult.filler - - scitype(filler) <: elscitype(vnew) || - error("Attempting to impute a value of scitype $(scitype(filler)) "* - "into a vector of incompatible elscitype, namely $(elscitype(vnew)). ") - - if elscitype(vnew) >: Missing - w_tight = replace_missing(nonmissing(elscitype(vnew)), vnew, filler) - else - w_tight = vnew - end - - return w_tight -end - -MMI.fitted_params(::UnivariateFillImputer, fitresult) = fitresult - -@with_kw_noshow mutable struct FillImputer <: Unsupervised - features::Vector{Symbol} = Symbol[] - continuous_fill::Function = _median - count_fill::Function = _round_median - finite_fill::Function = _mode -end - -function MMI.fit(transformer::FillImputer, verbosity::Int, X) - - s = schema(X) - features_seen = s.names |> collect # "seen" = "seen in fit" - scitypes_seen = s.scitypes |> collect - - features = isempty(transformer.features) ? features_seen : - transformer.features - - issubset(features, features_seen) || throw(ArgumentError( - "Some features specified do not exist in the supplied table. ")) - - # get corresponding scitypes: - mask = map(features_seen) do ftr - ftr in features - end - features = @view features_seen[mask] # `features` re-ordered - scitypes = @view scitypes_seen[mask] - features_and_scitypes = zip(features, scitypes) #|> collect - - # now keep those features that are imputable: - function isimputable(ftr, T::Type) - if verbosity > 0 && !isempty(transformer.features) - @info "Feature $ftr will not be imputed "* - "(imputation for $T not supported). " - end - return false - end - isimputable(ftr, ::Type{<:Union{Continuous,Missing}}) = true - isimputable(ftr, ::Type{<:Union{Count,Missing}}) = true - isimputable(ftr, ::Type{<:Union{Finite,Missing}}) = true - - mask = map(features_and_scitypes) do tup - isimputable(tup...) - end - features_to_be_imputed = @view features[mask] - - univariate_transformer = - UnivariateFillImputer(continuous_fill=transformer.continuous_fill, - count_fill=transformer.count_fill, - finite_fill=transformer.finite_fill) - univariate_fitresult(ftr) = MMI.fit(univariate_transformer, - verbosity - 1, - selectcols(X, ftr))[1] - - fitresult_given_feature = - Dict(ftr=> univariate_fitresult(ftr) for ftr in features_to_be_imputed) - - fitresult = (features_seen=features_seen, - univariate_transformer=univariate_transformer, - fitresult_given_feature=fitresult_given_feature) - report = NamedTuple() - cache = nothing - - return fitresult, cache, report -end - -function MMI.transform(transformer::FillImputer, fitresult, X) - - features_seen = fitresult.features_seen # seen in fit - univariate_transformer = fitresult.univariate_transformer - fitresult_given_feature = fitresult.fitresult_given_feature - - all_features = Tables.schema(X).names - - # check that no new features have appeared: - all(e -> e in features_seen, all_features) || throw(ArgumentError( - "Attempting to transform table with "* - "feature labels not seen in fit.\n"* - "Features seen in fit = $features_seen.\n"* - "Current features = $([all_features...]). ")) - - features = keys(fitresult_given_feature) - - cols = map(all_features) do ftr - col = MMI.selectcols(X, ftr) - if ftr in features - fr = fitresult_given_feature[ftr] - return transform(univariate_transformer, fr, col) - end - return col - end - - named_cols = NamedTuple{all_features}(tuple(cols...)) - return MMI.table(named_cols, prototype=X) - -end - -function MMI.fitted_params(::FillImputer, fr) - dict = fr.fitresult_given_feature - filler_given_feature = Dict(ftr=>dict[ftr].filler for ftr in keys(dict)) - return (features_seen_in_fit=fr.features_seen, - univariate_transformer=fr.univariate_transformer, - filler_given_feature=filler_given_feature) -end - -## UNIVARIATE DISCRETIZER - -# helper function: -reftype(::CategoricalArray{<:Any,<:Any,R}) where R = R - -@with_kw_noshow mutable struct UnivariateDiscretizer <:Unsupervised - n_classes::Int = 512 -end - -struct UnivariateDiscretizerResult{C} - odd_quantiles::Vector{Float64} - even_quantiles::Vector{Float64} - element::C -end - -function MMI.fit(transformer::UnivariateDiscretizer, verbosity::Int, X) - n_classes = transformer.n_classes - quantiles = quantile(X, Array(range(0, stop=1, length=2*n_classes+1))) - clipped_quantiles = quantiles[2:2*n_classes] # drop 0% and 100% quantiles - - # odd_quantiles for transforming, even_quantiles used for - # inverse_transforming: - odd_quantiles = clipped_quantiles[2:2:(2*n_classes-2)] - even_quantiles = clipped_quantiles[1:2:(2*n_classes-1)] - - # determine optimal reference type for encoding as categorical: - R = reftype(categorical(1:n_classes, compress=true)) - output_prototype = categorical(R(1):R(n_classes), compress=true, ordered=true) - element = output_prototype[1] - - cache = nothing - report = NamedTuple() - - res = UnivariateDiscretizerResult(odd_quantiles, even_quantiles, element) - return res, cache, report -end - -# acts on scalars: -function transform_to_int( - result::UnivariateDiscretizerResult{<:CategoricalValue{R}}, - r::Real) where R - k = oneR = R(1) - @inbounds for q in result.odd_quantiles - if r > q - k += oneR - end - end - return k -end - -# transforming scalars: -MMI.transform(::UnivariateDiscretizer, result, r::Real) = - transform(result.element, transform_to_int(result, r)) - -# transforming vectors: -function MMI.transform(::UnivariateDiscretizer, result, v) - w = [transform_to_int(result, r) for r in v] - return transform(result.element, w) -end - -# inverse_transforming raw scalars: -function MMI.inverse_transform( - transformer::UnivariateDiscretizer, result , k::Integer) - k <= transformer.n_classes && k > 0 || - error("Cannot transform an integer outside the range "* - "`[1, n_classes]`, where `n_classes = $(transformer.n_classes)`") - return result.even_quantiles[k] -end - -# inverse transforming a categorical value: -function MMI.inverse_transform( - transformer::UnivariateDiscretizer, result, e::CategoricalValue) - k = CategoricalArrays.DataAPI.unwrap(e) - return inverse_transform(transformer, result, k) -end - -# inverse transforming raw vectors: -MMI.inverse_transform(transformer::UnivariateDiscretizer, result, - w::AbstractVector{<:Integer}) = - [inverse_transform(transformer, result, k) for k in w] - -# inverse transforming vectors of categorical elements: -function MMI.inverse_transform(transformer::UnivariateDiscretizer, result, - wcat::AbstractVector{<:CategoricalValue}) - w = MMI.int(wcat) - return [inverse_transform(transformer, result, k) for k in w] -end - -MMI.fitted_params(::UnivariateDiscretizer, fitresult) = ( - odd_quantiles=fitresult.odd_quantiles, - even_quantiles=fitresult.even_quantiles -) - - -# # CONTINUOUS TRANSFORM OF TIME TYPE FEATURES - -mutable struct UnivariateTimeTypeToContinuous <: Unsupervised - zero_time::Union{Nothing, TimeType} - step::Period -end - -function UnivariateTimeTypeToContinuous(; - zero_time=nothing, step=Dates.Hour(24)) - model = UnivariateTimeTypeToContinuous(zero_time, step) - message = MMI.clean!(model) - isempty(message) || @warn message - return model -end - -function MMI.clean!(model::UnivariateTimeTypeToContinuous) - # Step must be able to be added to zero_time if provided. - msg = "" - if model.zero_time !== nothing - try - tmp = model.zero_time + model.step - catch err - if err isa MethodError - model.zero_time, model.step, status, msg = _fix_zero_time_step( - model.zero_time, model.step) - if status === :error - # Unable to resolve, rethrow original error. - throw(err) - end - else - throw(err) - end - end - end - return msg -end - -function _fix_zero_time_step(zero_time, step) - # Cannot add time parts to dates nor date parts to times. - # If a mismatch is encountered. Conversion from date parts to time parts - # is possible, but not from time parts to date parts because we cannot - # represent fractional date parts. - msg = "" - if zero_time isa Dates.Date && step isa Dates.TimePeriod - # Convert zero_time to a DateTime to resolve conflict. - if step % Hour(24) === Hour(0) - # We can convert step to Day safely - msg = "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `step` to `Day`." - step = convert(Day, step) - else - # We need datetime to be compatible with the step. - msg = "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `zero_time` to `DateTime`." - zero_time = convert(DateTime, zero_time) - end - return zero_time, step, :success, msg - elseif zero_time isa Dates.Time && step isa Dates.DatePeriod - # Convert step to Hour if possible. This will fail for - # isa(step, Month) - msg = "Cannot add `DatePeriod` `step` to `Time` `zero_time`. Converting `step` to `Hour`." - step = convert(Hour, step) - return zero_time, step, :success, msg - else - return zero_time, step, :error, msg - end -end - -function MMI.fit(model::UnivariateTimeTypeToContinuous, verbosity::Int, X) - if model.zero_time !== nothing - min_dt = model.zero_time - step = model.step - # Check zero_time is compatible with X - example = first(X) - try - X - min_dt - catch err - if err isa MethodError - @warn "`$(typeof(min_dt))` `zero_time` is not compatible with `$(eltype(X))` vector. Attempting to convert `zero_time`." - min_dt = convert(eltype(X), min_dt) - else - throw(err) - end - end - else - min_dt = minimum(X) - step = model.step - message = "" - try - min_dt + step - catch err - if err isa MethodError - min_dt, step, status, message = _fix_zero_time_step(min_dt, step) - if status === :error - # Unable to resolve, rethrow original error. - throw(err) - end - else - throw(err) - end - end - isempty(message) || @warn message - end - cache = nothing - report = NamedTuple() - fitresult = (min_dt, step) - return fitresult, cache, report -end - -function MMI.transform(model::UnivariateTimeTypeToContinuous, fitresult, X) - min_dt, step = fitresult - if typeof(min_dt) ≠ eltype(X) - # Cannot run if eltype in transform differs from zero_time from fit. - throw(ArgumentError("Different `TimeType` encountered during `transform` than expected from `fit`. Found `$(eltype(X))`, expected `$(typeof(min_dt))`")) - end - # Set the size of a single step. - next_time = min_dt + step - if next_time == min_dt - # Time type loops if step is a multiple of Hour(24), so calculate the - # number of multiples, then re-scale to Hour(12) and adjust delta to match original. - m = step / Dates.Hour(12) - delta = m * ( - Float64(Dates.value(min_dt + Dates.Hour(12)) - Dates.value(min_dt))) - else - delta = Float64(Dates.value(min_dt + step) - Dates.value(min_dt)) - end - return @. Float64(Dates.value(X - min_dt)) / delta -end - - -# # UNIVARIATE STANDARDIZATION - -""" - UnivariateStandardizer() - -Transformer type for standardizing (whitening) single variable data. - -This model may be deprecated in the future. Consider using -[`Standardizer`](@ref), which handles both tabular *and* univariate data. - -""" -mutable struct UnivariateStandardizer <: Unsupervised end - -function MMI.fit(transformer::UnivariateStandardizer, verbosity::Int, - v::AbstractVector{T}) where T<:Real - stdv = std(v) - stdv > eps(typeof(stdv)) || - @warn "Extremely small standard deviation encountered in standardization." - fitresult = (mean(v), stdv) - cache = nothing - report = NamedTuple() - return fitresult, cache, report -end - -MMI.fitted_params(::UnivariateStandardizer, fitresult) = - (mean=fitresult[1], std=fitresult[2]) - - -# for transforming single value: -function MMI.transform(transformer::UnivariateStandardizer, fitresult, x::Real) - mu, sigma = fitresult - return (x - mu)/sigma -end - -# for transforming vector: -MMI.transform(transformer::UnivariateStandardizer, fitresult, v) = - [transform(transformer, fitresult, x) for x in v] - -# for single values: -function MMI.inverse_transform(transformer::UnivariateStandardizer, fitresult, y::Real) - mu, sigma = fitresult - return mu + y*sigma -end - -# for vectors: -MMI.inverse_transform(transformer::UnivariateStandardizer, fitresult, w) = - [inverse_transform(transformer, fitresult, y) for y in w] - - -# # STANDARDIZATION OF ORDINAL FEATURES OF TABULAR DATA - -mutable struct Standardizer <: Unsupervised - # features to be standardized; empty means all - features::Union{AbstractVector{Symbol}, Function} - ignore::Bool # features to be ignored - ordered_factor::Bool - count::Bool -end - -# keyword constructor -function Standardizer( - ; - features::Union{AbstractVector{Symbol}, Function}=Symbol[], - ignore::Bool=false, - ordered_factor::Bool=false, - count::Bool=false -) - transformer = Standardizer(features, ignore, ordered_factor, count) - message = MMI.clean!(transformer) - isempty(message) || throw(ArgumentError(message)) - return transformer -end - -function MMI.clean!(transformer::Standardizer) - err = "" - if ( - typeof(transformer.features) <: AbstractVector{Symbol} && - isempty(transformer.features) && - transformer.ignore - ) - err *= "Features to be ignored must be specified in features field." - end - return err -end - -function MMI.fit(transformer::Standardizer, verbosity::Int, X) - - # if not a table, it must be an abstract vector, eltpye AbstractFloat: - is_univariate = !Tables.istable(X) - - # are we attempting to standardize Count or OrderedFactor? - is_invertible = !transformer.count && !transformer.ordered_factor - - # initialize fitresult: - fitresult_given_feature = LittleDict{Symbol,Tuple{AbstractFloat,AbstractFloat}}() - - # special univariate case: - if is_univariate - fitresult_given_feature[:unnamed] = - MMI.fit(UnivariateStandardizer(), verbosity - 1, X)[1] - return (is_univariate=true, - is_invertible=true, - fitresult_given_feature=fitresult_given_feature), - nothing, nothing - end - - all_features = Tables.schema(X).names - feature_scitypes = - collect(elscitype(selectcols(X, c)) for c in all_features) - scitypes = Vector{Type}([Continuous]) - transformer.ordered_factor && push!(scitypes, OrderedFactor) - transformer.count && push!(scitypes, Count) - AllowedScitype = Union{scitypes...} - - # determine indices of all_features to be transformed - if transformer.features isa AbstractVector{Symbol} - if isempty(transformer.features) - cols_to_fit = filter!(eachindex(all_features) |> collect) do j - feature_scitypes[j] <: AllowedScitype - end - else - !issubset(transformer.features, all_features) && verbosity > -1 && - @warn "Some specified features not present in table to be fit. " - cols_to_fit = filter!(eachindex(all_features) |> collect) do j - ifelse( - transformer.ignore, - !(all_features[j] in transformer.features) && - feature_scitypes[j] <: AllowedScitype, - (all_features[j] in transformer.features) && - feature_scitypes[j] <: AllowedScitype - ) - end - end - else - cols_to_fit = filter!(eachindex(all_features) |> collect) do j - ifelse( - transformer.ignore, - !(transformer.features(all_features[j])) && - feature_scitypes[j] <: AllowedScitype, - (transformer.features(all_features[j])) && - feature_scitypes[j] <: AllowedScitype - ) - end - end - - isempty(cols_to_fit) && verbosity > -1 && - @warn "No features to standarize." - - # fit each feature and add result to above dict - verbosity > 1 && @info "Features standarized: " - for j in cols_to_fit - col_data = if (feature_scitypes[j] <: OrderedFactor) - coerce(selectcols(X, j), Continuous) - else - selectcols(X, j) - end - col_fitresult, _, _ = - MMI.fit(UnivariateStandardizer(), verbosity - 1, col_data) - fitresult_given_feature[all_features[j]] = col_fitresult - verbosity > 1 && - @info " :$(all_features[j]) mu=$(col_fitresult[1]) "* - "sigma=$(col_fitresult[2])" - end - - fitresult = (is_univariate=false, is_invertible=is_invertible, - fitresult_given_feature=fitresult_given_feature) - cache = nothing - report = (features_fit=keys(fitresult_given_feature),) - - return fitresult, cache, report -end - -function MMI.fitted_params(::Standardizer, fitresult) - is_univariate, _, dic = fitresult - is_univariate && - return fitted_params(UnivariateStandardizer(), dic[:unnamed]) - features_fit = keys(dic) |> collect - zipped = map(ftr->dic[ftr], features_fit) - means, stds = zip(zipped...) |> collect - return (; features_fit, means, stds) -end - -MMI.transform(::Standardizer, fitresult, X) = - _standardize(transform, fitresult, X) - -function MMI.inverse_transform(::Standardizer, fitresult, X) - fitresult.is_invertible || - error("Inverse standardization is not supported when `count=true` "* - "or `ordered_factor=true` during fit. ") - return _standardize(inverse_transform, fitresult, X) -end - -function _standardize(operation, fitresult, X) - - # `fitresult` is dict of column fitresults, keyed on feature names - is_univariate, _, fitresult_given_feature = fitresult - - if is_univariate - univariate_fitresult = fitresult_given_feature[:unnamed] - return operation(UnivariateStandardizer(), univariate_fitresult, X) - end - - features_to_be_transformed = keys(fitresult_given_feature) - - all_features = Tables.schema(X).names - - all(e -> e in all_features, features_to_be_transformed) || - error("Attempting to transform data with incompatible feature labels.") - - col_transformer = UnivariateStandardizer() - - cols = map(all_features) do ftr - ftr_data = selectcols(X, ftr) - if ftr in features_to_be_transformed - col_to_transform = coerce(ftr_data, Continuous) - operation(col_transformer, - fitresult_given_feature[ftr], - col_to_transform) - else - ftr_data - end - end - - named_cols = NamedTuple{all_features}(tuple(cols...)) - - return MMI.table(named_cols, prototype=X) -end - - -# # UNIVARIATE BOX-COX TRANSFORMATIONS - -function standardize(v) - map(v) do x - (x - mean(v))/std(v) - end -end - -function midpoints(v::AbstractVector{T}) where T <: Real - return [0.5*(v[i] + v[i + 1]) for i in 1:(length(v) -1)] -end - -function normality(v) - n = length(v) - v = standardize(convert(Vector{Float64}, v)) - # sort and replace with midpoints - v = midpoints(sort!(v)) - # find the (approximate) expected value of the size (n-1)-ordered statistics for - # standard normal: - d = Distributions.Normal(0,1) - w = map(collect(1:(n-1))/n) do x - quantile(d, x) - end - return cor(v, w) -end - -function boxcox(lambda, c, x::Real) - c + x >= 0 || throw(DomainError) - if lambda == 0.0 - c + x > 0 || throw(DomainError) - return log(c + x) - end - return ((c + x)^lambda - 1)/lambda -end - -boxcox(lambda, c, v::AbstractVector{T}) where T <: Real = - [boxcox(lambda, c, x) for x in v] - -@with_kw_noshow mutable struct UnivariateBoxCoxTransformer <: Unsupervised - n::Int = 171 # nbr values tried in optimizing exponent lambda - shift::Bool = false # whether to shift data away from zero -end - -function MMI.fit(transformer::UnivariateBoxCoxTransformer, verbosity::Int, - v::AbstractVector{T}) where T <: Real - - m = minimum(v) - m >= 0 || error("Cannot perform a Box-Cox transformation on negative data.") - - c = 0.0 # default - if transformer.shift - if m == 0 - c = 0.2*mean(v) - end - else - m != 0 || error("Zero value encountered in data being Box-Cox transformed.\n"* - "Consider calling `fit!` with `shift=true`.") - end - - lambdas = range(-0.4, stop=3, length=transformer.n) - scores = Float64[normality(boxcox(l, c, v)) for l in lambdas] - lambda = lambdas[argmax(scores)] - - return (lambda, c), nothing, NamedTuple() -end - -MMI.fitted_params(::UnivariateBoxCoxTransformer, fitresult) = - (λ=fitresult[1], c=fitresult[2]) - -# for X scalar or vector: -MMI.transform(transformer::UnivariateBoxCoxTransformer, fitresult, X) = - boxcox(fitresult..., X) - -# scalar case: -function MMI.inverse_transform(transformer::UnivariateBoxCoxTransformer, - fitresult, x::Real) - lambda, c = fitresult - if lambda == 0 - return exp(x) - c - else - return (lambda*x + 1)^(1/lambda) - c - end -end - -# vector case: -function MMI.inverse_transform(transformer::UnivariateBoxCoxTransformer, - fitresult, w::AbstractVector{T}) where T <: Real - return [inverse_transform(transformer, fitresult, y) for y in w] -end - - -# # ONE HOT ENCODING - -@with_kw_noshow mutable struct OneHotEncoder <: Unsupervised - features::Vector{Symbol} = Symbol[] - drop_last::Bool = false - ordered_factor::Bool = true - ignore::Bool = false -end - -# we store the categorical refs for each feature to be encoded and the -# corresponing feature labels generated (called -# "names"). `all_features` is stored to ensure no new features appear -# in new input data, causing potential name clashes. -struct OneHotEncoderResult <: MMI.MLJType - all_features::Vector{Symbol} # all feature labels - ref_name_pairs_given_feature::Dict{Symbol,Vector{Union{Pair{<:Unsigned,Symbol}, Pair{Missing, Symbol}}}} - fitted_levels_given_feature::Dict{Symbol, CategoricalArray} -end - -# join feature and level into new label without clashing with anything -# in all_features: -function compound_label(all_features, feature, level) - label = Symbol(string(feature, "__", level)) - # in the (rare) case subft is not a new feature label: - while label in all_features - label = Symbol(string(label,"_")) - end - return label -end - -function MMI.fit(transformer::OneHotEncoder, verbosity::Int, X) - - all_features = Tables.schema(X).names # a tuple not vector - - if isempty(transformer.features) - specified_features = collect(all_features) - else - if transformer.ignore - specified_features = filter(all_features |> collect) do ftr - !(ftr in transformer.features) - end - else - specified_features = transformer.features - end - end - - - ref_name_pairs_given_feature = Dict{Symbol,Vector{Pair{<:Unsigned,Symbol}}}() - - allowed_scitypes = ifelse( - transformer.ordered_factor, - Union{Missing, Finite}, - Union{Missing, Multiclass} - ) - fitted_levels_given_feature = Dict{Symbol, CategoricalArray}() - col_scitypes = schema(X).scitypes - # apply on each feature - for j in eachindex(all_features) - ftr = all_features[j] - col = MMI.selectcols(X,j) - T = col_scitypes[j] - if T <: allowed_scitypes && ftr in specified_features - ref_name_pairs_given_feature[ftr] = Pair{<:Unsigned,Symbol}[] - shift = transformer.drop_last ? 1 : 0 - levels = classes(col) - fitted_levels_given_feature[ftr] = levels - if verbosity > 0 - @info "Spawning $(length(levels)-shift) sub-features "* - "to one-hot encode feature :$ftr." - end - for level in levels[1:end-shift] - ref = MMI.int(level) - name = compound_label(all_features, ftr, level) - push!(ref_name_pairs_given_feature[ftr], ref => name) - end - end - end - - fitresult = OneHotEncoderResult(collect(all_features), - ref_name_pairs_given_feature, - fitted_levels_given_feature) - - # get new feature names - d = ref_name_pairs_given_feature - new_features = Symbol[] - features_to_be_transformed = keys(d) - for ftr in all_features - if ftr in features_to_be_transformed - append!(new_features, last.(d[ftr])) - else - push!(new_features, ftr) - end - end - - report = (features_to_be_encoded= - collect(keys(ref_name_pairs_given_feature)), - new_features=new_features) - cache = nothing - - return fitresult, cache, report -end - -MMI.fitted_params(::OneHotEncoder, fitresult) = ( - all_features = fitresult.all_features, - fitted_levels_given_feature = fitresult.fitted_levels_given_feature, - ref_name_pairs_given_feature = fitresult.ref_name_pairs_given_feature, -) - -# If v=categorical('a', 'a', 'b', 'a', 'c') and MMI.int(v[1]) = ref -# then `_hot(v, ref) = [true, true, false, true, false]` -hot(v::AbstractVector{<:CategoricalValue}, ref) = map(v) do c - MMI.int(c) == ref -end - -function hot(col::AbstractVector{<:Union{Missing, CategoricalValue}}, ref) map(col) do c - if ismissing(ref) - missing - else - MMI.int(c) == ref - end -end -end - -function MMI.transform(transformer::OneHotEncoder, fitresult, X) - features = Tables.schema(X).names # tuple not vector - - d = fitresult.ref_name_pairs_given_feature - - # check the features match the fit result - all(e -> e in fitresult.all_features, features) || - error("Attempting to transform table with feature "* - "names not seen in fit. ") - new_features = Symbol[] - new_cols = [] # not Vector[] !! - features_to_be_transformed = keys(d) - for ftr in features - col = MMI.selectcols(X, ftr) - if ftr in features_to_be_transformed - Set(fitresult.fitted_levels_given_feature[ftr]) == - Set(classes(col)) || - error("Found category level mismatch in feature `$(ftr)`. "* - "Consider using `levels!` to ensure fitted and transforming "* - "features have the same category levels.") - append!(new_features, last.(d[ftr])) - pairs = d[ftr] - refs = first.(pairs) - names = last.(pairs) - cols_to_add = map(refs) do ref - if ismissing(ref) missing - else float.(hot(col, ref)) - end - end - append!(new_cols, cols_to_add) - else - push!(new_features, ftr) - push!(new_cols, col) - end - end - named_cols = NamedTuple{tuple(new_features...)}(tuple(new_cols)...) - return MMI.table(named_cols, prototype=X) -end - - -# # CONTINUOUS_ENCODING - -@with_kw_noshow mutable struct ContinuousEncoder <: Unsupervised - drop_last::Bool = false - one_hot_ordered_factors::Bool = false -end - -function MMI.fit(transformer::ContinuousEncoder, verbosity::Int, X) - - # what features can be converted and therefore kept? - s = schema(X) - features = s.names - scitypes = s.scitypes - Convertible = Union{Continuous, Finite, Count} - feature_scitype_tuples = zip(features, scitypes) |> collect - features_to_keep = - first.(filter(t -> last(t) <: Convertible, feature_scitype_tuples)) - features_to_be_dropped = setdiff(collect(features), features_to_keep) - - if verbosity > 0 - if !isempty(features_to_be_dropped) - @info "Some features cannot be replaced with "* - "`Continuous` features and will be dropped: "* - "$features_to_be_dropped. " - end - end - - # fit the one-hot encoder: - hot_encoder = - OneHotEncoder(ordered_factor=transformer.one_hot_ordered_factors, - drop_last=transformer.drop_last) - hot_fitresult, _, hot_report = MMI.fit(hot_encoder, verbosity - 1, X) - - new_features = setdiff(hot_report.new_features, features_to_be_dropped) - - fitresult = (features_to_keep=features_to_keep, - one_hot_encoder=hot_encoder, - one_hot_encoder_fitresult=hot_fitresult) - - # generate the report: - report = (features_to_keep=features_to_keep, - new_features=new_features) - - cache = nothing - - return fitresult, cache, report - -end - -MMI.fitted_params(::ContinuousEncoder, fitresult) = fitresult - -function MMI.transform(transformer::ContinuousEncoder, fitresult, X) - - features_to_keep, hot_encoder, hot_fitresult = values(fitresult) - - # dump unseen or untransformable features: - if !issubset(features_to_keep, MMI.schema(X).names) - throw( - ArgumentError( - "Supplied frame does not admit previously selected features." - ) - ) - end - X0 = MMI.selectcols(X, features_to_keep) - - # one-hot encode: - X1 = transform(hot_encoder, hot_fitresult, X0) - - # convert remaining to continuous: - return coerce(X1, Count=>Continuous, OrderedFactor=>Continuous) - -end - - -# # INTERACTION TRANSFORMER - -@mlj_model mutable struct InteractionTransformer <: Static - order::Int = 2::(_ > 1) - features::Union{Nothing, Vector{Symbol}} = nothing::(_ !== nothing ? length(_) > 1 : true) -end - -infinite_scitype(col) = eltype(scitype(col)) <: Infinite - -actualfeatures(features::Nothing, table) = - filter(feature -> infinite_scitype(Tables.getcolumn(table, feature)), Tables.columnnames(table)) - -function actualfeatures(features::Vector{Symbol}, table) - diff = setdiff(features, Tables.columnnames(table)) - diff != [] && throw(ArgumentError(string("Column(s) ", join([x for x in diff], ", "), " are not in the dataset."))) - - for feature in features - infinite_scitype(Tables.getcolumn(table, feature)) || throw(ArgumentError("Column $feature's scitype is not Infinite.")) - end - return Tuple(features) -end - -interactions(columns, order::Int) = - collect(Iterators.flatten(combinations(columns, i) for i in 2:order)) - -interactions(columns, variables...) = - .*((Tables.getcolumn(columns, var) for var in variables)...) - -function MMI.transform(model::InteractionTransformer, _, X) - features = actualfeatures(model.features, X) - interactions_ = interactions(features, model.order) - interaction_features = Tuple(Symbol(join(inter, "_")) for inter in interactions_) - columns = Tables.Columns(X) - interaction_table = NamedTuple{interaction_features}([interactions(columns, inter...) for inter in interactions_]) - return merge(Tables.columntable(X), interaction_table) -end - -# # METADATA FOR ALL BUILT-IN TRANSFORMERS - -metadata_pkg.( - ( - UnivariateStandardizer, - UnivariateDiscretizer, - Standardizer, - UnivariateBoxCoxTransformer, - UnivariateFillImputer, - OneHotEncoder, - FillImputer, - ContinuousEncoder, - UnivariateTimeTypeToContinuous, - InteractionTransformer - ), - package_name = "MLJModels", - package_uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7", - package_url = "https://github.com/JuliaAI/MLJModels.jl", - is_pure_julia = true, - package_license = "MIT") - -metadata_model(UnivariateFillImputer, - input_scitype = Union{AbstractVector{<:Union{Continuous,Missing}}, - AbstractVector{<:Union{Count,Missing}}, - AbstractVector{<:Union{Finite,Missing}}}, - output_scitype= Union{AbstractVector{<:Continuous}, - AbstractVector{<:Count}, - AbstractVector{<:Finite}}, - human_name = "single variable fill imputer", - load_path = "MLJModels.UnivariateFillImputer") - -metadata_model(FillImputer, - input_scitype = Table, - output_scitype = Table, - load_path = "MLJModels.FillImputer") - -metadata_model(UnivariateDiscretizer, - input_scitype = AbstractVector{<:Continuous}, - output_scitype = AbstractVector{<:OrderedFactor}, - human_name = "single variable discretizer", - load_path = "MLJModels.UnivariateDiscretizer") - -metadata_model(UnivariateStandardizer, - input_scitype = AbstractVector{<:Infinite}, - output_scitype = AbstractVector{Continuous}, - human_name = "single variable discretizer", - load_path = "MLJModels.UnivariateStandardizer") - -metadata_model(Standardizer, - input_scitype = Union{Table, AbstractVector{<:Continuous}}, - output_scitype = Union{Table, AbstractVector{<:Continuous}}, - load_path = "MLJModels.Standardizer") - -metadata_model(UnivariateBoxCoxTransformer, - input_scitype = AbstractVector{Continuous}, - output_scitype = AbstractVector{Continuous}, - human_name = "single variable Box-Cox transformer", - load_path = "MLJModels.UnivariateBoxCoxTransformer") - -metadata_model(OneHotEncoder, - input_scitype = Table, - output_scitype = Table, - human_name = "one-hot encoder", - load_path = "MLJModels.OneHotEncoder") - -metadata_model(ContinuousEncoder, - input_scitype = Table, - output_scitype = Table(Continuous), - load_path = "MLJModels.ContinuousEncoder") - -metadata_model(UnivariateTimeTypeToContinuous, - input_scitype = AbstractVector{<:ScientificTimeType}, - output_scitype = AbstractVector{Continuous}, - human_name ="single variable transformer that creates "* - "continuous representations of temporally typed data", - load_path = "MLJModels.UnivariateTimeTypeToContinuous") - -metadata_model(InteractionTransformer, - input_scitype = Tuple{Table}, - output_scitype = Table, - human_name = "interaction transformer", - load_path = "MLJModels.InteractionTransformer") - -# # DOC STRINGS - -# The following document strings comply with the MLJ standard. - -""" -$(MLJModelInterface.doc_header(UnivariateFillImputer)) - -Use this model to imputing `missing` values in a vector with a fixed -value learned from the non-missing values of training vector. - -For imputing missing values in tabular data, use [`FillImputer`](@ref) -instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector with element scitype `Union{Missing, T}` - where `T` is a subtype of `Continuous`, `Multiclass`, - `OrderedFactor` or `Count`; check scitype using `scitype(x)` - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `continuous_fill`: function or other callable to determine value to - be imputed in the case of `Continuous` (abstract float) data; - default is to apply `median` after skipping `missing` values - -- `count_fill`: function or other callable to determine value to be - imputed in the case of `Count` (integer) data; default is to apply - rounded `median` after skipping `missing` values - -- `finite_fill`: function or other callable to determine value to be - imputed in the case of `Multiclass` or `OrderedFactor` data - (categorical vectors); default is to apply `mode` after skipping - `missing` values - - -# Operations - -- `transform(mach, xnew)`: return `xnew` with missing values imputed - with the fill values learned when fitting `mach` - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `filler`: the fill value to be imputed in all new data - - -# Examples - -``` -using MLJ -imputer = UnivariateFillImputer() - -x_continuous = [1.0, 2.0, missing, 3.0] -x_multiclass = coerce(["y", "n", "y", missing, "y"], Multiclass) -x_count = [1, 1, 1, 2, missing, 3, 3] - -mach = machine(imputer, x_continuous) -fit!(mach) - -julia> fitted_params(mach) -(filler = 2.0,) - -julia> transform(mach, [missing, missing, 101.0]) -3-element Vector{Float64}: - 2.0 - 2.0 - 101.0 - -mach2 = machine(imputer, x_multiclass) |> fit! - -julia> transform(mach2, x_multiclass) -5-element CategoricalArray{String,1,UInt32}: - "y" - "n" - "y" - "y" - "y" - -mach3 = machine(imputer, x_count) |> fit! - -julia> transform(mach3, [missing, missing, 5]) -3-element Vector{Int64}: - 2 - 2 - 5 -``` - -For imputing tabular data, use [`FillImputer`](@ref). - -""" -UnivariateFillImputer - -""" -$(MLJModelInterface.doc_header(FillImputer)) - -Use this model to impute `missing` values in tabular data. A fixed -"filler" value is learned from the training data, one for each column -of the table. - -For imputing missing values in a vector, use -[`UnivariateFillImputer`](@ref) instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any table of input features (eg, a `DataFrame`) whose columns - each have element scitypes `Union{Missing, T}`, where `T` is a - subtype of `Continuous`, `Multiclass`, `OrderedFactor` or - `Count`. Check scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: a vector of names of features (symbols) for which - imputation is to be attempted; default is empty, which is - interpreted as "impute all". - -- `continuous_fill`: function or other callable to determine value to - be imputed in the case of `Continuous` (abstract float) data; default is to apply - `median` after skipping `missing` values - -- `count_fill`: function or other callable to determine value to - be imputed in the case of `Count` (integer) data; default is to apply - rounded `median` after skipping `missing` values - -- `finite_fill`: function or other callable to determine value to be - imputed in the case of `Multiclass` or `OrderedFactor` data - (categorical vectors); default is to apply `mode` after skipping `missing` values - - -# Operations - -- `transform(mach, Xnew)`: return `Xnew` with missing values imputed with - the fill values learned when fitting `mach` - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_seen_in_fit`: the names of features (columns) encountered - during training - -- `univariate_transformer`: the univariate model applied to determine - the fillers (it's fields contain the functions defining the filler computations) - -- `filler_given_feature`: dictionary of filler values, keyed on - feature (column) names - - -# Examples - -``` -using MLJ -imputer = FillImputer() - -X = (a = [1.0, 2.0, missing, 3.0, missing], - b = coerce(["y", "n", "y", missing, "y"], Multiclass), - c = [1, 1, 2, missing, 3]) - -schema(X) -julia> schema(X) -┌───────┬───────────────────────────────┐ -│ names │ scitypes │ -├───────┼───────────────────────────────┤ -│ a │ Union{Missing, Continuous} │ -│ b │ Union{Missing, Multiclass{2}} │ -│ c │ Union{Missing, Count} │ -└───────┴───────────────────────────────┘ - -mach = machine(imputer, X) -fit!(mach) - -julia> fitted_params(mach).filler_given_feature -(filler = 2.0,) - -julia> fitted_params(mach).filler_given_feature -Dict{Symbol, Any} with 3 entries: - :a => 2.0 - :b => "y" - :c => 2 - -julia> transform(mach, X) -(a = [1.0, 2.0, 2.0, 3.0, 2.0], - b = CategoricalValue{String, UInt32}["y", "n", "y", "y", "y"], - c = [1, 1, 2, 2, 3],) -``` - -See also [`UnivariateFillImputer`](@ref). - -""" -FillImputer - -""" -$(MLJModelInterface.doc_header(Standardizer)) - -Use this model to standardize (whiten) a `Continuous` vector, or -relevant columns of a table. The rescalings applied by this -transformer to new data are always those learned during the training -phase, which are generally different from what would actually -standardize the new data. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any Tables.jl compatible table or any abstract vector with - `Continuous` element scitype (any abstract float vector). Only - features in a table with `Continuous` scitype can be standardized; - check column scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: one of the following, with the behavior indicated below: - - - `[]` (empty, the default): standardize all features (columns) - having `Continuous` element scitype - - - non-empty vector of feature names (symbols): standardize only the - `Continuous` features in the vector (if `ignore=false`) or - `Continuous` features *not* named in the vector (`ignore=true`). - - - function or other callable: standardize a feature if the callable - returns `true` on its name. For example, `Standardizer(features = - name -> name in [:x1, :x3], ignore = true, count=true)` has the - same effect as `Standardizer(features = [:x1, :x3], ignore = true, - count=true)`, namely to standardize all `Continuous` and `Count` - features, with the exception of `:x1` and `:x3`. - - Note this behavior is further modified if the `ordered_factor` or `count` flags - are set to `true`; see below - -- `ignore=false`: whether to ignore or standardize specified `features`, as - explained above - -- `ordered_factor=false`: if `true`, standardize any `OrderedFactor` - feature wherever a `Continuous` feature would be standardized, as - described above - -- `count=false`: if `true`, standardize any `Count` feature wherever a - `Continuous` feature would be standardized, as described above - - -# Operations - -- `transform(mach, Xnew)`: return `Xnew` with relevant features - standardized according to the rescalings learned during fitting of - `mach`. - -- `inverse_transform(mach, Z)`: apply the inverse transformation to - `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is - approximately the same as `Xnew`; unavailable if `ordered_factor` or - `count` flags were set to `true`. - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_fit` - the names of features that will be standardized - -- `means` - the corresponding untransformed mean values - -- `stds` - the corresponding untransformed standard deviations - - -# Report - -The fields of `report(mach)` are: - -- `features_fit`: the names of features that will be standardized - - -# Examples - -``` -using MLJ - -X = (ordinal1 = [1, 2, 3], - ordinal2 = coerce([:x, :y, :x], OrderedFactor), - ordinal3 = [10.0, 20.0, 30.0], - ordinal4 = [-20.0, -30.0, -40.0], - nominal = coerce(["Your father", "he", "is"], Multiclass)); - -julia> schema(X) -┌──────────┬──────────────────┐ -│ names │ scitypes │ -├──────────┼──────────────────┤ -│ ordinal1 │ Count │ -│ ordinal2 │ OrderedFactor{2} │ -│ ordinal3 │ Continuous │ -│ ordinal4 │ Continuous │ -│ nominal │ Multiclass{3} │ -└──────────┴──────────────────┘ - -stand1 = Standardizer(); - -julia> transform(fit!(machine(stand1, X)), X) -(ordinal1 = [1, 2, 3], - ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x], - ordinal3 = [-1.0, 0.0, 1.0], - ordinal4 = [1.0, 0.0, -1.0], - nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) - -stand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true); - -julia> transform(fit!(machine(stand2, X)), X) -(ordinal1 = [-1.0, 0.0, 1.0], - ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x], - ordinal3 = [10.0, 20.0, 30.0], - ordinal4 = [1.0, 0.0, -1.0], - nominal = CategoricalValue{String,UInt32}["Your father", "he", "is"],) -``` - -See also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref). -""" -Standardizer - - -""" -$(MLJModelInterface.doc_header(UnivariateDiscretizer)) - -Discretization converts a `Continuous` vector into an `OrderedFactor` -vector. In particular, the output is a `CategoricalVector` (whose -reference type is optimized). - -The transformation is chosen so that the vector on which the -transformer is fit has, in transformed form, an approximately uniform -distribution of values. Specifically, if `n_classes` is the level of -discretization, then `2*n_classes - 1` ordered quantiles are computed, -the odd quantiles being used for transforming (discretization) and the -even quantiles for inverse transforming. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector with `Continuous` element scitype; check - scitype with `scitype(x)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `n_classes`: number of discrete classes in the output - - -# Operations - -- `transform(mach, xnew)`: discretize `xnew` according to the - discretization learned when fitting `mach` - -- `inverse_transform(mach, z)`: attempt to reconstruct from `z` a - vector that transforms to give `z` - - -# Fitted parameters - -The fields of `fitted_params(mach).fitesult` include: - -- `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`) - -- `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`) - - -# Example - -``` -using MLJ -using Random -Random.seed!(123) - -discretizer = UnivariateDiscretizer(n_classes=100) -mach = machine(discretizer, randn(1000)) -fit!(mach) - -julia> x = rand(5) -5-element Vector{Float64}: - 0.8585244609846809 - 0.37541692370451396 - 0.6767070590395461 - 0.9208844241267105 - 0.7064611415680901 - -julia> z = transform(mach, x) -5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}: - 0x52 - 0x42 - 0x4d - 0x54 - 0x4e - -x_approx = inverse_transform(mach, z) -julia> x - x_approx -5-element Vector{Float64}: - 0.008224506144777322 - 0.012731354778359405 - 0.0056265330571125816 - 0.005738175684445124 - 0.006835652575801987 -``` - -""" -UnivariateDiscretizer - - -""" -$(MLJModelInterface.doc_header(UnivariateBoxCoxTransformer)) - -Box-Cox transformations attempt to make data look more normally -distributed. This can improve performance and assist in the -interpretation of models which suppose that data is -generated by a normal distribution. - -A Box-Cox transformation (with shift) is of the form - - x -> ((x + c)^λ - 1)/λ - -for some constant `c` and real `λ`, unless `λ = 0`, in which -case the above is replaced with - - x -> log(x + c) - -Given user-specified hyper-parameters `n::Integer` and `shift::Bool`, -the present implementation learns the parameters `c` and `λ` from the -training data as follows: If `shift=true` and zeros are encountered in -the data, then `c` is set to `0.2` times the data mean. If there are -no zeros, then no shift is applied. Finally, `n` different values of `λ` -between `-0.4` and `3` are considered, with `λ` fixed to the value -maximizing normality of the transformed data. - -*Reference:* [Wikipedia entry for power - transform](https://en.wikipedia.org/wiki/Power_transform). - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector with element scitype `Continuous`; check - the scitype with `scitype(x)` - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `n=171`: number of values of the exponent `λ` to try - -- `shift=false`: whether to include a preliminary constant translation - in transformations, in the presence of zeros - - -# Operations - -- `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach` - -- `inverse_transform(mach, z)`: reconstruct the vector `z` whose - transformation learned by `mach` is `z` - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `λ`: the learned Box-Cox exponent - -- `c`: the learned shift - - -# Examples - -``` -using MLJ -using UnicodePlots -using Random -Random.seed!(123) - -transf = UnivariateBoxCoxTransformer() - -x = randn(1000).^2 - -mach = machine(transf, x) -fit!(mach) - -z = transform(mach, x) - -julia> histogram(x) - ┌ ┐ - [ 0.0, 2.0) ┤███████████████████████████████████ 848 - [ 2.0, 4.0) ┤████▌ 109 - [ 4.0, 6.0) ┤█▍ 33 - [ 6.0, 8.0) ┤▍ 7 - [ 8.0, 10.0) ┤▏ 2 - [10.0, 12.0) ┤ 0 - [12.0, 14.0) ┤▏ 1 - └ ┘ - Frequency - -julia> histogram(z) - ┌ ┐ - [-5.0, -4.0) ┤█▎ 8 - [-4.0, -3.0) ┤████████▊ 64 - [-3.0, -2.0) ┤█████████████████████▊ 159 - [-2.0, -1.0) ┤█████████████████████████████▊ 216 - [-1.0, 0.0) ┤███████████████████████████████████ 254 - [ 0.0, 1.0) ┤█████████████████████████▊ 188 - [ 1.0, 2.0) ┤████████████▍ 90 - [ 2.0, 3.0) ┤██▊ 20 - [ 3.0, 4.0) ┤▎ 1 - └ ┘ - Frequency - -``` - -""" -UnivariateBoxCoxTransformer - - -""" -$(MLJModelInterface.doc_header(OneHotEncoder)) - -Use this model to one-hot encode the `Multiclass` and `OrderedFactor` -features (columns) of some table, leaving other columns unchanged. - -New data to be transformed may lack features present in the fit data, -but no *new* features can be present. - -**Warning:** This transformer assumes that `levels(col)` for any -`Multiclass` or `OrderedFactor` column, `col`, is the same for -training data and new data to be transformed. - -To ensure *all* features are transformed into `Continuous` features, or -dropped, use [`ContinuousEncoder`](@ref) instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any Tables.jl compatible table. Columns can be of mixed type - but only those with element scitype `Multiclass` or `OrderedFactor` - can be encoded. Check column scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `features`: a vector of symbols (column names). If empty (default) - then all `Multiclass` and `OrderedFactor` features are - encoded. Otherwise, encoding is further restricted to the specified - features (`ignore=false`) or the unspecified features - (`ignore=true`). This default behavior can be modified by the - `ordered_factor` flag. - -- `ordered_factor=false`: when `true`, `OrderedFactor` features are - universally excluded - -- `drop_last=true`: whether to drop the column corresponding to the - final class of encoded features. For example, a three-class feature - is spawned into three new features if `drop_last=false`, but just - two features otherwise. - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `all_features`: names of all features encountered in training - -- `fitted_levels_given_feature`: dictionary of the levels associated - with each feature encoded, keyed on the feature name - -- `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such - as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl - reference integer representing a level, and `ftr` the corresponding - new feature name; the dictionary is keyed on the names of features that - are encoded - - -# Report - -The fields of `report(mach)` are: - -- `features_to_be_encoded`: names of input features to be encoded - -- `new_features`: names of all output features - - -# Example - -``` -using MLJ - -X = (name=categorical(["Danesh", "Lee", "Mary", "John"]), - grade=categorical(["A", "B", "A", "C"], ordered=true), - height=[1.85, 1.67, 1.5, 1.67], - n_devices=[3, 2, 4, 3]) - -julia> schema(X) -┌───────────┬──────────────────┐ -│ names │ scitypes │ -├───────────┼──────────────────┤ -│ name │ Multiclass{4} │ -│ grade │ OrderedFactor{3} │ -│ height │ Continuous │ -│ n_devices │ Count │ -└───────────┴──────────────────┘ - -hot = OneHotEncoder(drop_last=true) -mach = fit!(machine(hot, X)) -W = transform(mach, X) - -julia> schema(W) -┌──────────────┬────────────┐ -│ names │ scitypes │ -├──────────────┼────────────┤ -│ name__Danesh │ Continuous │ -│ name__John │ Continuous │ -│ name__Lee │ Continuous │ -│ grade__A │ Continuous │ -│ grade__B │ Continuous │ -│ height │ Continuous │ -│ n_devices │ Count │ -└──────────────┴────────────┘ -``` - -See also [`ContinuousEncoder`](@ref). - -""" -OneHotEncoder - - -""" -$(MLJModelInterface.doc_header(ContinuousEncoder)) - -Use this model to arrange all features (columns) of a table to have -`Continuous` element scitype, by applying the following protocol to -each feature `ftr`: - -- If `ftr` is already `Continuous` retain it. - -- If `ftr` is `Multiclass`, one-hot encode it. - -- If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, - Continuous)` (vector of floating point integers), unless - `ordered_factors=false` is specified, in which case one-hot encode - it. - -- If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`. - -- If `ftr` has some other element scitype, or was not observed in - fitting the encoder, drop it from the table. - -**Warning:** This transformer assumes that `levels(col)` for any -`Multiclass` or `OrderedFactor` column, `col`, is the same for -training data and new data to be transformed. - -To selectively one-hot-encode categorical features (without dropping -columns) use [`OneHotEncoder`](@ref) instead. - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, X) - -where - -- `X`: any Tables.jl compatible table. Columns can be of mixed type - but only those with element scitype `Multiclass` or `OrderedFactor` - can be encoded. Check column scitypes with `schema(X)`. - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `drop_last=true`: whether to drop the column corresponding to the - final class of one-hot encoded features. For example, a three-class - feature is spawned into three new features if `drop_last=false`, but - two just features otherwise. - -- `one_hot_ordered_factors=false`: whether to one-hot any feature - with `OrderedFactor` element scitype, or to instead coerce it - directly to a (single) `Continuous` feature using the order - - -# Fitted parameters - -The fields of `fitted_params(mach)` are: - -- `features_to_keep`: names of features that will not be dropped from - the table - -- `one_hot_encoder`: the `OneHotEncoder` model instance for handling - the one-hot encoding - -- `one_hot_encoder_fitresult`: the fitted parameters of the - `OneHotEncoder` model - - -# Report - -- `features_to_keep`: names of input features that will not be dropped - from the table - -- `new_features`: names of all output features - - -# Example - -```julia -X = (name=categorical(["Danesh", "Lee", "Mary", "John"]), - grade=categorical(["A", "B", "A", "C"], ordered=true), - height=[1.85, 1.67, 1.5, 1.67], - n_devices=[3, 2, 4, 3], - comments=["the force", "be", "with you", "too"]) - -julia> schema(X) -┌───────────┬──────────────────┐ -│ names │ scitypes │ -├───────────┼──────────────────┤ -│ name │ Multiclass{4} │ -│ grade │ OrderedFactor{3} │ -│ height │ Continuous │ -│ n_devices │ Count │ -│ comments │ Textual │ -└───────────┴──────────────────┘ - -encoder = ContinuousEncoder(drop_last=true) -mach = fit!(machine(encoder, X)) -W = transform(mach, X) - -julia> schema(W) -┌──────────────┬────────────┐ -│ names │ scitypes │ -├──────────────┼────────────┤ -│ name__Danesh │ Continuous │ -│ name__John │ Continuous │ -│ name__Lee │ Continuous │ -│ grade │ Continuous │ -│ height │ Continuous │ -│ n_devices │ Continuous │ -└──────────────┴────────────┘ - -julia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features -1-element Vector{Symbol}: - :comments - -``` - -See also [`OneHotEncoder`](@ref) -""" -ContinuousEncoder - - -""" -$(MLJModelInterface.doc_header(UnivariateTimeTypeToContinuous)) - -Use this model to convert vectors with a `TimeType` element type to -vectors of `Float64` type (`Continuous` element scitype). - - -# Training data - -In MLJ or MLJBase, bind an instance `model` to data with - - mach = machine(model, x) - -where - -- `x`: any abstract vector whose element type is a subtype of - `Dates.TimeType` - -Train the machine using `fit!(mach, rows=...)`. - - -# Hyper-parameters - -- `zero_time`: the time that is to correspond to 0.0 under - transformations, with the type coinciding with the training data - element type. If unspecified, the earliest time encountered in - training is used. - -- `step::Period=Hour(24)`: time interval to correspond to one unit - under transformation - - -# Operations - -- `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit - - -# Fitted parameters - -`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` -actually used in transformations, which may differ from the -user-specified hyper-parameters. - - -# Example - -``` -using MLJ -using Dates - -x = [Date(2001, 1, 1) + Day(i) for i in 0:4] - -encoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1), - step=Week(1)) - -mach = machine(encoder, x) -fit!(mach) -julia> transform(mach, x) -5-element Vector{Float64}: - 52.285714285714285 - 52.42857142857143 - 52.57142857142857 - 52.714285714285715 - 52.857142 -``` - -""" -UnivariateTimeTypeToContinuous - -""" -$(MLJModelInterface.doc_header(InteractionTransformer)) - -Generates all polynomial interaction terms up to the given order for the subset of chosen -columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to -generate interactions. If `features` is not specified, all such columns with scitype -`<:Infinite` in the table are used as a basis. - -In MLJ or MLJBase, you can transform features `X` with the single call - - transform(machine(model), X) - -See also the example below. - - -# Hyper-parameters - -- `order`: Maximum order of interactions to be generated. -- `features`: Restricts interations generation to those columns - -# Operations - -- `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` - using the hyper-parameters specified in `model`. - -# Example - -``` -using MLJ - -X = ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"] -) -it = InteractionTransformer(order=3) -mach = machine(it) - -julia> transform(mach, X) -(A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - A_B_C = [28, 80, 162],) - -it = InteractionTransformer(order=2, features=[:A, :B]) -mach = machine(it) - -julia> transform(mach, X) -(A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18],) - -``` - -""" -InteractionTransformer diff --git a/test/GaussianProcesses.jl b/test/GaussianProcesses.jl deleted file mode 100755 index 172006b..0000000 --- a/test/GaussianProcesses.jl +++ /dev/null @@ -1,42 +0,0 @@ -module TestGaussianProcesses - -using MLJBase -using Test -using Random:seed! -import CategoricalArrays - -seed!(113355) - -X, y = @load_crabs - -# load code to be tested: -import MLJModels -import GaussianProcesses -using MLJModels.GaussianProcesses_ - -baregp = GPClassifier() - -# split the rows: -allrows = eachindex(y) -train, test = partition(allrows, 0.7, shuffle=true) - -fitresult, cache, report = - MLJBase.fit(baregp, 1, MLJBase.selectrows(X, train), y[train]) -yhat = predict(baregp, fitresult, MLJBase.selectrows(X, test)); - -@test sum(yhat .== y[test]) / length(y[test]) >= 0.7 # around 0.7 - -fitresult, cache, report = MLJBase.fit(baregp, 1, X, y) -yhat2 = predict(baregp, fitresult, MLJBase.selectrows(X, test)); - - -# gp = machine(baregp, X, y) -# fit!(gp) -# yhat2 = predict(gp, MLJBase.selectrows(X, test)) - -@test sum(yhat2 .== y[test]) / length(y[test]) >= 0.7 - -MLJModels.info_dict(baregp) - -end # module -true diff --git a/test/NaiveBayes.jl b/test/NaiveBayes.jl deleted file mode 100755 index fa0103c..0000000 --- a/test/NaiveBayes.jl +++ /dev/null @@ -1,112 +0,0 @@ -module TestNaiveBayes - -using Pkg -using MLJBase -using Test -import Random.seed! - -import MLJModels -import NaiveBayes - -using MLJModels.NaiveBayes_ -using CategoricalArrays - -## GAUSSIAN - -gaussian_classifier = GaussianNBClassifier() -MLJModels.info_dict(gaussian_classifier) - -# gaussian classifier takes continuous features -X, y = @load_iris - -train, test = partition(eachindex(y), 0.6) - -fitresultG, cacheG, reportG = fit(gaussian_classifier, 1, - selectrows(X, train), y[train]); - -gaussian_pred = predict(gaussian_classifier, fitresultG, selectrows(X, test)); - -yhat1 = gaussian_pred[1] -@test Set(classes(yhat1)) == Set(classes(y[1])) - -# test with linear data: -seed!(1234) -x1 = randn(3000); -x2 = randn(3000); -x3 = randn(3000); -X = (x1=x1, x2=x2, x3=x3); -ycont = x1 - x2 -2x3; -y = map(ycont) do η - η > 0 ? "go" : "stop" -end |> categorical; -train, test = partition(eachindex(y), 0.8); - -gaussian_classifier = GaussianNBClassifier() - -fitresultG, cacheG, reportG = MLJBase.fit(gaussian_classifier, 1, - selectrows(X, train), y[train]) - -gaussian_pred = MLJBase.predict_mode(gaussian_classifier, - fitresultG, selectrows(X, test)) - -@test sum(gaussian_pred .!= y[test])/length(y) < 0.05 - - -## MULTINOMIAL - -# first contrive some test data - -# some word counts in children's books about colours: -red = [2, 0, 1, 0, 1] -blue = [4, 1, 2, 3, 2] -green = [0, 2, 0, 6, 1] -X = (red=red, blue=blue, green=green) - -# gender of author: -y = categorical([:m, :f, :m, :f, :m]) -male = y[1] -female = y[2] - -# Note: The smoothing algorithm is to add to the training data, for -# each class observed, a row with every feature getting count of -# alpha. So smoothing also effects the class marginals (is this -# standard)? Only integer values of alpha allowed. - -# computing conditional probabilities by hand with Lagrangian -# smoothing (alpha=1): -red_given_m = 5/16 -blue_given_m = 9/16 -green_given_m = 2/16 -red_given_f = 1/15 -blue_given_f = 5/15 -green_given_f = 9/15 - -m_(red, blue, green) = - 4/7*(red_given_m^red)*(blue_given_m^blue)*(green_given_m^green) -f_(red, blue, green) = - 3/7*(red_given_f^red)*(blue_given_f^blue)*(green_given_f^green) -normalizer(red, blue, green) = m_(red, blue, green) + f_(red, blue, green) -m(a...) = m_(a...)/normalizer(a...) -f(a...) = f_(a...)/normalizer(a...) - -Xnew = (red=[1, 1], blue=[1, 2], green=[1, 3]) - -# prediction by hand: - -yhand =[MLJBase.UnivariateFinite([male, female], [m(1, 1, 1), f(1, 1, 1)]), - MLJBase.UnivariateFinite([male, female], [m(1, 2, 3), f(1, 2, 3)])] - -multinomial_classifier = MultinomialNBClassifier() -MLJModels.info_dict(multinomial_classifier) - -fitresultMLT, cacheMLT, reportMLT = - MLJBase.fit(multinomial_classifier, 1, X, y) - -yhat = MLJBase.predict(multinomial_classifier, fitresultMLT, Xnew) - -# see issue https://github.com/dfdx/NaiveBayes.jl/issues/42 -@test_broken pdf(yhand[1], :m) ≈ pdf(yhat[1], :m) -@test_broken pdf(yhand[1], :f) ≈ pdf(yhat[1], :f) - -end # module -true diff --git a/test/builtins/Transformers.jl b/test/builtins/Transformers.jl deleted file mode 100644 index 1a3550f..0000000 --- a/test/builtins/Transformers.jl +++ /dev/null @@ -1,647 +0,0 @@ -module TestTransformer - -using Test, MLJModels -using Tables, CategoricalArrays, Random -using ScientificTypes -using StatsBase -using Statistics -using StableRNGs -stable_rng = StableRNGs.StableRNG(123) -using Dates: DateTime, Date, Time, Day, Hour -import MLJBase - -_get(x) = CategoricalArrays.DataAPI.unwrap(x) - - -#### UNIVARIATE DISCRETIZATION #### - -@testset "U-Discr" begin - v = randn(10000) - t = UnivariateDiscretizer(n_classes=100); - result, = MLJBase.fit(t, 1, v) - w = MLJBase.transform(t, result, v) - bad_values = filter(v - MLJBase.inverse_transform(t, result, w)) do x - abs(x) > 0.05 - end - @test length(bad_values)/length(v) < 0.06 - - # scalars: - @test MLJBase.transform(t, result, v[42]) == w[42] - r = MLJBase.inverse_transform(t, result, w)[43] - @test MLJBase.inverse_transform(t, result, w[43]) ≈ r - - # test of permitted abuses of argument: - @test MLJBase.inverse_transform(t, result, _get(w[43])) ≈ r - @test MLJBase.inverse_transform(t, result, map(_get, w)) ≈ - MLJBase.inverse_transform(t, result, w) - - # all transformed vectors should have an identical pool (determined in - # call to fit): - v2 = v[1:3] - w2 = MLJBase.transform(t, result, v2) - @test levels(w2) == levels(w) - -end - -#### STANDARDIZER #### - -@testset begin "standardization" - - # UnivariateStandardizer: - stand = UnivariateStandardizer() - f, = MLJBase.fit(stand, 1, [0, 2, 4]) - @test round.(Int, MLJBase.transform(stand, f, [0,4,8])) == [-1.0,1.0,3.0] - @test round.(Int, MLJBase.inverse_transform(stand, f, [-1, 1, 3])) == - [0, 4, 8] - - N = 5 - rand_char = rand("abcefgh", N) - while length(unique(rand_char)) < 2 - rand_char = rand("abcefgh", N) - end - X = (OverallQual = rand(UInt8, N), - GrLivArea = rand(N), - Neighborhood = categorical(rand_char, ordered=true), - x1stFlrSF = sample(1:10, N, replace=false), - TotalBsmtSF = rand(N)) - - # introduce a field of type `Char`: - x1 = categorical(map(Char, (X.OverallQual |> collect))) - - X = (x1=x1, x2=X[2], x3=X[3], x4=X[4], x5=X[5]) - - stand = Standardizer() - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - - # test inverse: - XX = MLJBase.inverse_transform(stand, f, Xnew) - @test MLJBase.schema(X) == MLJBase.schema(XX) - @test XX.x1 == X.x1 - @test XX.x2 ≈ X.x2 - @test XX.x3 == X.x3 - @test XX.x4 == X.x4 - @test XX.x5 ≈ X.x5 - - # test transformation: - @test Xnew[1] == X[1] - @test MLJBase.std(Xnew[2]) ≈ 1.0 - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test MLJBase.std(Xnew[5]) ≈ 1.0 - - # test feature specification (ignore=false): - stand.features = [:x1, :x5] - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - @test issubset(Set(keys(f[3])), Set(Tables.schema(X).names[[5,]])) - Xt = MLJBase.transform(stand, f, X) - @test Xnew[1] == X[1] - @test Xnew[2] == X[2] - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test MLJBase.std(Xnew[5]) ≈ 1.0 - - # test on ignoring a feature, even if it's listed in the `features` - stand.ignore = true - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - @test issubset(Set(keys(f[3])), Set(Tables.schema(X).names[[2,]])) - Xt = MLJBase.transform(stand, f, X) - @test Xnew[1] == X[1] - @test MLJBase.std(Xnew[2]) ≈ 1.0 - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test Xnew[5] == X[5] - - # test warnings about features not encountered in fit or no - # features need transforming: - stand = Standardizer(features=[:x1, :mickey_mouse]) - @test_logs( - (:warn, r"Some specified"), - (:warn, r"No features"), - MLJBase.fit(stand, 1, X) - ) - stand.ignore = true - @test_logs (:warn, r"Some specified") MLJBase.fit(stand, 1, X) - - # features must be specified if ignore=true - @test_throws ArgumentError Standardizer(ignore=true) - - # test count, ordered_factor options: - stand = Standardizer(features=[:x3, :x4], count=true, ordered_factor=true) - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - @test issubset(Set(keys(f[3])), Set(Tables.schema(X).names[3:4,])) - Xt = MLJBase.transform(stand, f, X) - @test_throws Exception MLJBase.inverse_transform(stand, f, Xt) - - @test Xnew[1] == X[1] - @test Xnew[2] == X[2] - @test elscitype(X[3]) <: OrderedFactor - @test elscitype(Xnew[3]) <: Continuous - @test MLJBase.std(Xnew[3]) ≈ 1.0 - @test elscitype(X[4]) == Count - @test elscitype(Xnew[4]) <: Continuous - @test MLJBase.std(Xnew[4]) ≈ 1.0 - @test Xnew[5] == X[5] - - stand = Standardizer(features= x-> x == (:x2)) - f, = MLJBase.fit(stand, 1, X) - Xnew = MLJBase.transform(stand, f, X) - - @test Xnew[1] == X[1] - @test MLJBase.std(Xnew[2]) ≈ 1.0 - @test Xnew[3] == X[3] - @test Xnew[4] == X[4] - @test Xnew[5] == X[5] - - # univariate case - stand = Standardizer() - f, _, _ = MLJBase.fit(stand, 1, [0, 2, 4]) - @test round.(Int, MLJBase.transform(stand, f, [0,4,8])) == [-1.0,1.0,3.0] - fp = MLJBase.fitted_params(stand, f) - @test fp.mean ≈ 2.0 - @test fp.std ≈ 2.0 -end - -### TIMETYPE TO CONTINUOUS - -@testset "TimeTypeToContinuous" begin - let dt = [Date(2018, 6, 15) + Day(i) for i=0:10], - transformer = UnivariateTimeTypeToContinuous(; step=Day(1)) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - @test fr == (Date(2018, 6, 15), Day(1)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10)) - end - - let dt = [Date(2018, 6, 15) + Day(i) for i=0:10], - transformer = UnivariateTimeTypeToContinuous() - fr, _, _ = @test_logs( - (:warn, r"Cannot add `TimePeriod` `step`"), - MLJBase.fit(transformer, 1, dt) - ) - fr, _, _ = @test_logs (:warn, r"C") MLJBase.fit(transformer, 1, dt) - @test fr == (Date(2018, 6, 15), Day(1)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10)) - end - - let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30], - transformer = UnivariateTimeTypeToContinuous(; - step = Hour(1), - zero_time = Time(7, 0, 0), - ) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - @test fr == (Time(7, 0, 0), Hour(1)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - ex = collect(0:3:30) .% 24 .- 7.0 - diff = map(dt_continuous .- ex) do d - mod(d, 24.0) - end - @test all(diff .≈ 0.0) - end - - let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30], - transformer = UnivariateTimeTypeToContinuous() - fr, _, _ = MLJBase.fit(transformer, 1, dt) - @test fr == (Time(0, 0, 0), Hour(24)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - ex = collect(0:3:30) .% 24 ./ 24 - diff = map(dt_continuous .- ex) do d - mod(d, 1.0) - end - @test all(diff .≈ 0.0) - end - - # test log messages - let dt = [DateTime(2018, 6, 15) + Day(i) for i=0:10], - step=Hour(1), - zero_time=Date(2018, 6, 15), - transformer = @test_logs( - (:warn, "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `zero_time` to `DateTime`."), - UnivariateTimeTypeToContinuous(; - step=step, - zero_time=zero_time, - ) - ) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - - @test fr == (zero_time, step) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10).*24) - end - - let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30], - zero_time=Time(0, 0, 0), - step=Day(1), - transformer = @test_logs( - (:warn, "Cannot add `DatePeriod` `step` to `Time` `zero_time`. Converting `step` to `Hour`."), - UnivariateTimeTypeToContinuous(; - step=step, - zero_time=zero_time, - ) - ) - fr, _, _ = MLJBase.fit(transformer, 1, dt) - - @test fr == (zero_time, convert(Hour, step)) - dt_continuous = MLJBase.transform(transformer, fr, dt) - ex = Float64.((0:3:30) .% 24)./24 - diff = map(dt_continuous .- ex) do d - mod(d, 1.0) - end - @test all(diff .≈ 0.0) - end - - let dt = [DateTime(2018, 6, 15) + Day(i) for i=0:10], - step=Day(1), - zero_time=Date(2018, 6, 15), - transformer = UnivariateTimeTypeToContinuous(; - step=step, - zero_time=zero_time, - ) - fr, _, _ = @test_logs( - (:warn, r"`Date"), - MLJBase.fit(transformer, 1, dt) - ) - - @test fr == (zero_time, step) - dt_continuous = MLJBase.transform(transformer, fr, dt) - @test all(dt_continuous .== Float64.(0:10)) - end -end - - -#### UNIVARIATE BOX COX TRANSFORMER #### - -@testset "U-boxcox" begin - # create skewed non-negative vector with a zero value: - Random.seed!(1551) - v = abs.(randn(1000)) - v = v .- minimum(v) - - t = UnivariateBoxCoxTransformer(shift=true) - f, = MLJBase.fit(t, 2, v) - - e = v - MLJBase.inverse_transform(t, f, MLJBase.transform(t, f, v)) - @test sum(abs, e) <= 5000*eps() - -end - - -#### ONE HOT ENCODER #### - -@testset "One-Hot" begin - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23]) - - t = OneHotEncoder() - f, _, report = @test_logs((:info, r"Spawning 3"), - (:info, r"Spawning 3"), MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - - @test Xt.name__John == float.([false, true, false, true]) - @test Xt.height == X.height - @test Xt.favourite_number__10 == float.([false, false, true, false]) - @test Xt.age == X.age - @test MLJBase.schema(Xt).names == (:name__Ben, :name__John, :name__Mary, - :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, - :age) - - @test report.new_features == collect(MLJBase.schema(Xt).names) - - # test that *entire* pool of categoricals is used in fit, including - # unseen levels: - f, = @test_logs((:info, r"Spawning 3"), (:info, r"Spawning 3"), - MLJBase.fit(t, 1, MLJBase.selectrows(X,1:2))) - Xtsmall = MLJBase.transform(t, f, X) - @test Xt == Xtsmall - - # test that transform can be applied to subset of the data: - @test MLJBase.transform(t, f, MLJBase.selectcols(X, [:name, :age])) == - MLJBase.selectcols(MLJBase.transform(t, f, X), - [:name__Ben, :name__John, :name__Mary, :age]) - - # test ignore - t = OneHotEncoder(features=[:name,], ignore=true) - f, = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test MLJBase.schema(Xt).names == (:name, :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, - :age) - - # test exclusion of ordered factors: - t = OneHotEncoder(ordered_factor=false) - f, = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test keys(Xt) == (:name, :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, :age) - - @test :name in Tables.schema(Xt).names - @test :favourite_number__5 in Tables.schema(Xt).names - @test MLJBase.schema(Xt).scitypes == (OrderedFactor{3}, Continuous, - Continuous, Continuous, - Continuous, Count) - - # test that one may not add new columns: - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23], - gender = categorical(['M', 'M', 'F', 'M'])) - @test_throws Exception MLJBase.transform(t, f, X) - - # test to throw exception when category level mismatch is found - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23]) - Xmiss = (name = categorical(["John", "Mary", "John"], ordered=true), - height = X.height, - favourite_number = X.favourite_number, - age = X.age) - t = OneHotEncoder() - f, = MLJBase.fit(t, 0, X) - @test_throws Exception MLJBase.transform(t, f, Xmiss) - - # test the work on missing values - X = (name = categorical(["Ben", "John", "Mary", "John", missing], ordered=true), - height = [1.85, 1.67, 1.5, 1.67, 1.56], - favourite_number = categorical([7, 5, 10, missing, 5]), - age = [23, 23, 14, 23, 21]) - - t = OneHotEncoder() - f, _, report = @test_logs((:info, r"Spawning 3"), - (:info, r"Spawning 3"), MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - - @test length(Xt.name__John) == 5 - @test collect(skipmissing(Xt.name__John)) == float.([false, true, false, true]) - @test ismissing(Xt.name__John[5]) - @test Xt.height == X.height - @test length(Xt.favourite_number__10) == 5 - @test collect(skipmissing(Xt.favourite_number__10)) == float.([false, false, true, false]) - @test ismissing(Xt.favourite_number__10[4]) - @test Xt.age == X.age - @test MLJBase.schema(Xt).names == (:name__Ben, :name__John, :name__Mary, - :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, - :age) - - @test report.new_features == collect(MLJBase.schema(Xt).names) - - # test the work on missing values with drop_last = true - - X = (name = categorical(["Ben", "John", "Mary", "John", missing], ordered=true), - height = [1.85, 1.67, 1.5, 1.67, 1.56], - favourite_number = categorical([7, 5, 10, missing, 5]), - age = [23, 23, 14, 23, 21]) - - t = OneHotEncoder(drop_last = true) - f, _, report = @test_logs((:info, r"Spawning 2"), - (:info, r"Spawning 2"), MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - - @test length(Xt.name__John) == 5 - @test collect(skipmissing(Xt.name__John)) == float.([false, true, false, true]) - @test ismissing(Xt.name__John[5]) - @test Xt.height == X.height - @test ismissing(Xt.favourite_number__5[4]) - @test collect(skipmissing(Xt.favourite_number__5)) == float.([false, true, false, true]) - @test ismissing(Xt.favourite_number__5[4]) - @test Xt.age == X.age - @test MLJBase.schema(Xt).names == (:name__Ben, :name__John, - :height, :favourite_number__5, - :favourite_number__7, - :age) - - @test_throws Exception Xt.favourite_number__10 - @test_throws Exception Xt.name__Mary - @test report.new_features == collect(MLJBase.schema(Xt).names) - - # Test when the first value is missing - X = (name=categorical([missing, "John", "Mary", "John"]),) - t = OneHotEncoder() - f, _, _ = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test Xt.name__John[1] === Xt.name__Mary[1] === missing - @test Xt.name__John[2:end] == Union{Missing, Float64}[1.0, 0.0, 1.0] - @test Xt.name__Mary[2:end] == Union{Missing, Float64}[0.0, 1.0, 0.0] - -end - - -#### FILL IMPUTER ####' - -@testset "UnivariateFillImputer" begin - vpure = rand(stable_rng, 10) - v = vcat([missing, ], vpure) - filler = median(vpure) - imp = MLJModels.UnivariateFillImputer() - f, = MLJBase.fit(imp, 1, v) - vnew = [missing, 1.0, missing, 2.0, 3.0] - @test MLJBase.transform(imp, f, vnew) ≈ [filler, 1.0, filler, 2.0, 3.0] - - vpure = MLJBase.coerce(rand(stable_rng, "abc", 100), OrderedFactor); - v = vcat([missing, ], vpure) - filler = mode(vpure) - imp = MLJModels.UnivariateFillImputer() - f, = MLJBase.fit(imp, 1, v) - vnew = vcat([missing, ], vpure[end-10:end], [missing, ]) - @test MLJBase.transform(imp, f, vnew) == - vcat([filler, ], vpure[end-10:end], [filler, ]) - - vpure = rand(stable_rng, Int, 10) - v = vcat([missing, ], vpure) - filler = round(Int, median(vpure)) - imp = MLJModels.UnivariateFillImputer() - f, = MLJBase.fit(imp, 1, v) - vnew = [missing, 1, missing, 2, 3] - @test MLJBase.transform(imp, f, vnew) == [filler, 1, filler, 2, 3] - - @test_throws Exception MLJBase.transform(imp, f, [missing, "1", "2"]) - - @test_throws ArgumentError MLJBase.fit(imp, 1, [missing, "1", "2"]) - -end - -@testset "FillImputer" begin - X = ( - x = [missing,ones(10)...], - y = [missing,ones(10)...], - z = [missing,ones(10)...] - ) - - imp = FillImputer() - f, = MLJBase.fit(imp, 1, X) - - fp = MLJBase.fitted_params(imp, f) - @test fp.features_seen_in_fit == [:x, :y, :z] - @test fp.univariate_transformer == MLJModels.UnivariateFillImputer() - @test fp.filler_given_feature[:x] ≈ 1.0 - @test fp.filler_given_feature[:x] ≈ 1.0 - @test fp.filler_given_feature[:x] ≈ 1.0 - - Xnew = MLJBase.selectrows(X, 1:5) - Xt = MLJBase.transform(imp, f, Xnew) - @test all(.!ismissing.(Xt.x)) - @test Xt.x isa Vector{Float64} # no missing - @test all(Xt.x .== 1.0) - - imp = FillImputer(features=[:x,:y]) - f, = MLJBase.fit(imp, 1, X) - Xt = MLJBase.transform(imp, f, Xnew) - @test all(Xt.x .== 1.0) - @test all(Xt.y .== 1.0) - @test ismissing(Xt.z[1]) - - # adding a new feature not seen in fit: - Xnew = (x = X.x, y=X.y, a=X.x) - @test_throws ArgumentError MLJBase.transform(imp, f, Xnew) - - # mixture of features: - X = (x = categorical([missing, missing, missing, missing, - "Old", "Young", "Middle", "Young", - "Old", "Young", "Middle", "Young"]), - y = [missing, ones(11)...], - z = [missing, missing, 1,1,1,1,1,5,1,1,1,1], - a = rand("abc", 12)) - - imp = FillImputer() - f, = MLJBase.fit(imp, 1, X) - Xnew = MLJBase.selectrows(X, 1:4) - Xt = MLJBase.transform(imp, f, Xnew) - - @test all(.!ismissing.(Xt.x)) - @test all(.!ismissing.(Xt.y)) - @test all(.!ismissing.(Xt.z)) - @test all(.!ismissing.(Xt.a)) - - @test Xt.x[1] == mode(skipmissing(X.x)) - @test Xt.y[1] == 1 - @test Xt.z[1] == 1 - - # user specifies a feature explicitly that's not supported: - imp = FillImputer(features=[:x, :a]) # :a of Unknown scitype not supported - @test_logs (:info, r"Feature a will not") MLJBase.fit(imp, 1, X) - -end - - -#### CONTINUOUS ENCODER #### - -@testset "Continuous encoder" begin - - X = (name = categorical(["Ben", "John", "Mary", "John"], ordered=true), - height = [1.85, 1.67, 1.5, 1.67], - rubbish = ["a", "b", "c", "a"], - favourite_number = categorical([7, 5, 10, 5]), - age = [23, 23, 14, 23]) - - t = ContinuousEncoder() - f, _, _ = @test_logs((:info, r"Some.*dropped\:.*\:rubbish\]"), - MLJBase.fit(t, 1, X)) - - Xt = MLJBase.transform(t, f, X) - @test scitype(Xt) <: MLJBase.Table(MLJBase.Continuous) - s = MLJBase.schema(Xt) - @test s.names == (:name, :height, :favourite_number__5, - :favourite_number__7, :favourite_number__10, :age) - - t = ContinuousEncoder(drop_last=true, one_hot_ordered_factors=true) - f, _, r = MLJBase.fit(t, 0, X) - Xt = MLJBase.transform(t, f, X) - @test scitype(Xt) <: MLJBase.Table(MLJBase.Continuous) - s = MLJBase.schema(Xt) - @test s.names == (:name__Ben, :name__John, :height, :favourite_number__5, - :favourite_number__7, :age) - -end - -#### INTERACTION TRANSFORMER #### - -@testset "Interaction Transformer functions" begin - # No column provided, A has scitype Continuous, B has scitype Count - table = (A = [1., 2., 3.], B = [4, 5, 6], C = ["x₁", "x₂", "x₃"]) - @test MLJModels.actualfeatures(nothing, table) == (:A, :B) - # Column provided - @test MLJModels.actualfeatures([:A, :B], table) == (:A, :B) - # Column provided, not in table - @test_throws ArgumentError("Column(s) D are not in the dataset.") MLJModels.actualfeatures([:A, :D], table) - # Non Infinite scitype column provided - @test_throws ArgumentError("Column C's scitype is not Infinite.") MLJModels.actualfeatures([:A, :C], table) -end - - -@testset "Interaction Transformer" begin - # Check constructor sanity checks: order > 1, length(features) > 1 - @test_logs (:warn, "Constraint `model.order > 1` failed; using default: order=2.") InteractionTransformer(order = 1) - @test_logs (:warn, "Constraint `if model.features !== nothing\n"* - " length(model.features) > 1\nelse\n true\nend` failed; "* - "using default: features=nothing.") InteractionTransformer(features = [:A]) - - X = (A = [1, 2, 3], B = [4, 5, 6], C = [7, 8, 9]) - # Default order=2, features=nothing, ie all columns - Xt = MLJBase.transform(InteractionTransformer(), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54] - ) - # order=3, features=nothing, ie all columns - Xt = MLJBase.transform(InteractionTransformer(order=3), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - A_B_C = [28, 80, 162] - ) - # order=2, features=[:A, :B], ie all columns - Xt =MLJBase.transform(InteractionTransformer(order=2, features=[:A, :B]), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - A_B = [4, 10, 18] - ) - # order=3, features=[:A, :B, :C], some non continuous columns - X = merge(X, (D = ["x₁", "x₂", "x₃"],)) - Xt = MLJBase.transform(InteractionTransformer(order=3, features=[:A, :B, :C]), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - A_B_C = [28, 80, 162] - ) - # order=2, features=nothing, only continuous columns are dealt with - Xt = MLJBase.transform(InteractionTransformer(order=2), nothing, X) - @test Xt == ( - A = [1, 2, 3], - B = [4, 5, 6], - C = [7, 8, 9], - D = ["x₁", "x₂", "x₃"], - A_B = [4, 10, 18], - A_C = [7, 16, 27], - B_C = [28, 40, 54], - ) -end - -end -true diff --git a/test/model_search.jl b/test/model_search.jl index 87b03dd..24eeefe 100644 --- a/test/model_search.jl +++ b/test/model_search.jl @@ -1,7 +1,7 @@ module TestModelSearch using Test -using MLJModels +using MLJModels, MLJTransforms using MLJBase using ScientificTypes using Markdown @@ -36,7 +36,7 @@ tree = info("DecisionTreeRegressor", pkg="DecisionTree") # Note that these tests assume model registry metadata is up to date # with the latest trait values in `src/builtins/`: @test info(ConstantRegressor) == cnst - @test info(Standardizer()) == info("Standardizer", pkg="MLJModels") + @test info(Standardizer()) == info("Standardizer", pkg="MLJTransforms") @test doc("ConstantRegressor", pkg="MLJModels") == cnst.docstring |> Markdown.parse @test_throws MLJModels.ERR_DOC_EXPECTS_STRING doc(ConstantRegressor) @test_throws MLJModels.ERR_DOC_EXPECTS_STRING doc(ConstantRegressor()) diff --git a/test/runtests.jl b/test/runtests.jl index 05fea36..b4ebfd9 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ import Pkg -using Test, MLJModels +using Test, MLJModels, MLJTransforms @testset "metadata" begin @testset "metadata.jl" begin @@ -18,9 +18,6 @@ end @testset "Constant.jl" begin @test include("builtins/Constant.jl") end - @testset "Transformers.jl" begin - @test include("builtins/Transformers.jl") - end @testset "ThresholdPredictors" begin @test include("builtins/ThresholdPredictors.jl") end From 54e0dc9757a9e07f4d1d840532c07f3c568e0ede Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 22 Aug 2025 14:20:09 +1200 Subject: [PATCH 17/22] add MLJTransforms to model registry Project.toml and update registry --- src/registry/Metadata.toml | 782 ++++++++++++++++++++++++++++++++----- src/registry/Project.toml | 1 + 2 files changed, 688 insertions(+), 95 deletions(-) diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index 12e4272..e51e397 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -739,9 +739,601 @@ ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" +[MLJTransforms.Standardizer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.Standardizer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" +":is_pure_julia" = "`true`" +":human_name" = "standardizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=unknown\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "Standardizer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[MLJTransforms.UnivariateTimeTypeToContinuous] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" +":hyperparameters" = "`(:zero_time, :step)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable transformer that creates continuous representations of temporally typed data" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=unknown\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateTimeTypeToContinuous" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" + +[MLJTransforms.OneHotEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.OneHotEncoder" +":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" +":is_pure_julia" = "`true`" +":human_name" = "one-hot encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=unknown\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OneHotEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.ContinuousEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContinuousEncoder" +":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" +":is_pure_julia" = "`true`" +":human_name" = "continuous encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=unknown\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContinuousEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" + +[MLJTransforms.FrequencyEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.FrequencyEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" +":is_pure_julia" = "`true`" +":human_name" = "frequency encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FrequencyEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.TargetEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Real\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.TargetEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" +":is_pure_julia" = "`true`" +":human_name" = "target encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using\n\nempirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable \ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "TargetEncoder" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.UnivariateBoxCoxTransformer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" +":hyperparameters" = "`(:n, :shift)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable Box-Cox transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=unknown\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateBoxCoxTransformer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" + +[MLJTransforms.InteractionTransformer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Static`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.InteractionTransformer" +":hyperparameters" = "`(:order, :features)`" +":is_pure_julia" = "`true`" +":human_name" = "interaction transformer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=unknown\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "InteractionTransformer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.UnivariateDiscretizer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\",)`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing,)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateDiscretizer" +":hyperparameters" = "`(:n_classes,)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=unknown\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateDiscretizer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":is_wrapper" = "`false`" + +[MLJTransforms.CardinalityReducer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.CardinalityReducer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" +":is_pure_julia" = "`true`" +":human_name" = "cardinality reducer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency < `min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be\n\nan integer or a float which decides whether raw counts or normalized frequencies are used.\n\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "CardinalityReducer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.OrdinalEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.OrdinalEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" +":is_pure_julia" = "`true`" +":human_name" = "ordinal encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OrdinalEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.FillImputer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.FillImputer" +":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" +":is_pure_julia" = "`true`" +":human_name" = "fill imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=unknown\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FillImputer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.MissingnessEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.MissingnessEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" +":is_pure_julia" = "`true`" +":human_name" = "missingness encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "MissingnessEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.ContrastEncoder] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContrastEncoder" +":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" +":is_pure_julia" = "`true`" +":human_name" = "contrast encoder" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`.\n\nIf `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname, k)`,\n\nwhere `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false, \n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContrastEncoder" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" +":is_wrapper" = "`false`" + +[MLJTransforms.UnivariateStandardizer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`()`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`()`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateStandardizer" +":hyperparameters" = "`()`" +":is_pure_julia" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateStandardizer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" + +[MLJTransforms.UnivariateFillImputer] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" +":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateFillImputer" +":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable fill imputer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=unknown\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateFillImputer" +":target_in_fit" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":is_wrapper" = "`false`" + [CatBoost.CatBoostRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, PythonCall.Core.Py, String}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, String, PythonCall.Py}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" ":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" @@ -778,7 +1370,7 @@ [CatBoost.CatBoostClassifier] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, PythonCall.Core.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Core.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Core.Py}\")`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" ":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" @@ -814,7 +1406,7 @@ ":constructor" = "`nothing`" [NearestNeighborModels.KNNClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -848,10 +1440,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [NearestNeighborModels.MultitargetKNNClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:NTuple{N, AbstractVector}}}}\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -885,10 +1477,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [NearestNeighborModels.MultitargetKNNRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -922,10 +1514,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [NearestNeighborModels.KNNRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -959,10 +1551,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJXGBoostInterface.XGBoostCount] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -996,10 +1588,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJXGBoostInterface.XGBoostRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1033,10 +1625,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJXGBoostInterface.XGBoostClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1070,7 +1662,7 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJScikitLearnInterface.ProbabilisticSGDClassifier] ":constructor" = "`nothing`" @@ -3848,7 +4440,7 @@ ":is_wrapper" = "`false`" [OutlierDetectionNeighbors.ABODDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3882,10 +4474,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionNeighbors.DNNDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Real\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3919,10 +4511,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionNeighbors.LOFDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3956,10 +4548,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionNeighbors.KNNDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Symbol\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3993,10 +4585,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [OutlierDetectionNeighbors.COFDetector] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4030,10 +4622,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [SIRUS.StableRulesClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4067,10 +4659,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [SIRUS.StableForestClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4104,10 +4696,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [SIRUS.StableRulesRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4141,10 +4733,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [SIRUS.StableForestRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4178,7 +4770,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJIteration.IteratedModel] ":constructor" = "`IteratedModel`" @@ -4255,7 +4847,7 @@ ":constructor" = "`nothing`" [PartitionedLS.PartLS] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Type{PartitionedLS.Alt}, Type{PartitionedLS.BnB}, Type{PartitionedLS.Opt}}\", \"Matrix{Int64}\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"Union{Nothing, Int64, Random.AbstractRNG}\")`" ":package_uuid" = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4289,7 +4881,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.QuantileRegressor] ":constructor" = "`nothing`" @@ -4662,7 +5254,7 @@ ":is_wrapper" = "`false`" [Maxnet.MaxnetBinaryClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{String, Vector{<:Maxnet.AbstractFeatureClass}}\", \"Float64\", \"Any\", \"Bool\", \"Integer\", \"Float64\", \"GLM.Link\", \"Bool\", \"Any\")`" ":package_uuid" = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4696,10 +5288,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [ParallelKMeans.KMeans] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Symbol, ParallelKMeans.AbstractKMeansAlg}\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\", \"Any\", \"Any\")`" ":package_uuid" = "42b8e9d4-006b-409a-8472-7f34b3fb58af" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4733,7 +5325,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJNaiveBayesInterface.GaussianNBClassifier] ":is_wrapper" = "`false`" @@ -5809,7 +6401,7 @@ ":constructor" = "`nothing`" [MLJTuning.TunedModel] -":constructor" = "`TunedModel`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5843,7 +6435,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`TunedModel`" [FeatureSelection.FeatureSelector] ":constructor" = "`nothing`" @@ -6253,7 +6845,7 @@ ":constructor" = "`nothing`" [SymbolicRegression.MultitargetSRRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" ":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6287,10 +6879,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [SymbolicRegression.SRRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" ":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6324,7 +6916,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeClassifier] ":is_wrapper" = "`false`" @@ -7179,7 +7771,7 @@ ":constructor" = "`nothing`" [OneRule.OneRuleClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`()`" ":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" ":hyperparameter_ranges" = "`()`" @@ -7213,10 +7805,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.MCDDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -7250,10 +7842,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.COPODDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" @@ -7287,10 +7879,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.HBOSDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7324,10 +7916,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.IForestDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7361,10 +7953,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.SOSDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7398,10 +7990,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.ABODDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"String\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -7435,10 +8027,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.LOFDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7472,10 +8064,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.PCADetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7509,10 +8101,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.INNEDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7546,10 +8138,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.OCSVMDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7583,10 +8175,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.ECODDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" @@ -7620,10 +8212,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.SODDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7657,10 +8249,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.LODADetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -7694,10 +8286,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.KDEDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -7731,11 +8323,11 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.CDDetector] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"PythonCall.Core.Py\",)`" +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"PythonCall.Py\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" @@ -7768,10 +8360,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.KNNDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7805,10 +8397,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.GMMDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7842,10 +8434,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.COFDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"String\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -7879,10 +8471,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.CBLOFDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7916,10 +8508,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.LOCIDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -7953,10 +8545,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.LMDDDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -7990,10 +8582,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionPython.RODDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" @@ -8027,10 +8619,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [SelfOrganizingMaps.SelfOrganizingMap] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" ":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -8064,7 +8656,7 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [InteractiveUtils] diff --git a/src/registry/Project.toml b/src/registry/Project.toml index 5a11e7d..1e96fd3 100644 --- a/src/registry/Project.toml +++ b/src/registry/Project.toml @@ -25,6 +25,7 @@ MLJScikitLearnInterface = "5ae90465-5518-4432-b9d2-8a1def2f0cab" MLJTSVDInterface = "7fa162e1-0e29-41ca-a6fa-c000ca4e7e7e" MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd" MLJText = "5e27fcf9-6bac-46ba-8580-b5712f3d6387" +MLJTransforms = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" From df7dbcf33b2373809f13dd0dafe993d7ca5bb4b7 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 22 Aug 2025 19:32:52 +1200 Subject: [PATCH 18/22] add MLJTransforms to the registry and update all registry metadata --- Project.toml | 14 +- src/registry/Metadata.toml | 4152 ++++++++++++-------------- test/builtins/ThresholdPredictors.jl | 2 +- 3 files changed, 1895 insertions(+), 2273 deletions(-) diff --git a/Project.toml b/Project.toml index ed3c499..53f394c 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" +MLJModelRegistryTools = "0a96183e-380b-4aa6-be10-c555140810f2" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" @@ -38,6 +39,7 @@ Distributions = "0.25" InteractiveUtils = "1" LinearAlgebra = "1" MLJModelInterface = "1.10" +MLJModelRegistryTools = "0.1.1" MLJTransforms = "0.1.1" Markdown = "1" OrderedCollections = "1.1" @@ -66,14 +68,4 @@ Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = [ - "Distributed", - "MLJBase", - "MLJDecisionTreeInterface", - "MLJMultivariateStatsInterface", - "MLJTransforms", - "Pkg", - "StableRNGs", - "Suppressor", - "Test", -] +test = ["Distributed", "MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "MLJTransforms", "Pkg", "StableRNGs", "Suppressor", "Test"] diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml index e51e397..05d3433 100644 --- a/src/registry/Metadata.toml +++ b/src/registry/Metadata.toml @@ -1,6 +1,6 @@ [BetaML.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -34,10 +34,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -71,10 +71,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Float64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -108,10 +108,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.RandomForestImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Vector{Int64}\", \"Union{Nothing, Function}\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -145,10 +145,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.PerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -182,10 +182,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.AutoEncoder] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"BetaML.Api.AutoTuneMethod\", \"String\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -219,10 +219,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.DecisionTreeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -256,10 +256,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.PegasosClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Matrix{Float64}}\", \"Union{Nothing, Vector{Float64}}\", \"Function\", \"Float64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -293,47 +293,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[BetaML.NeuralNetworkRegressor] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" -":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" -":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" -":is_pure_julia" = "`true`" -":human_name" = "neural network regressor" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/sylvaticus/BetaML.jl" -":package_name" = "BetaML" -":name" = "NeuralNetworkRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" [BetaML.KMeansClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -367,10 +330,47 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":constructor" = "`nothing`" + +[BetaML.NeuralNetworkRegressor] ":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" +":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "BetaML.Bmlj.NeuralNetworkRegressor" +":hyperparameters" = "`(:layers, :loss, :dloss, :epochs, :batch_size, :opt_alg, :shuffle, :descr, :cb, :rng)`" +":is_pure_julia" = "`true`" +":human_name" = "neural network regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```julia\nmutable struct NeuralNetworkRegressor <: MLJModelInterface.Deterministic\n```\n\nA simple but flexible Feedforward Neural Network, from the Beta Machine Learning Toolkit (BetaML) for regression of a single dimensional target.\n\n# Parameters:\n\n * `layers`: Array of layer objects [def: `nothing`, i.e. basic network]. See `subtypes(BetaML.AbstractLayer)` for supported layers\n * `loss`: Loss (cost) function [def: `BetaML.squared_cost`]. Should always assume y and ŷ as matrices, even if the regression task is 1-D\n\n !!! warning\n If you change the parameter `loss`, you need to either provide its derivative on the parameter `dloss` or use autodiff with `dloss=nothing`.\n\n * `dloss`: Derivative of the loss function [def: `BetaML.dsquared_cost`, i.e. use the derivative of the squared cost]. Use `nothing` for autodiff.\n * `epochs`: Number of epochs, i.e. passages trough the whole training sample [def: `200`]\n * `batch_size`: Size of each individual batch [def: `16`]\n * `opt_alg`: The optimisation algorithm to update the gradient at each batch [def: `BetaML.ADAM()`]. See `subtypes(BetaML.OptimisationAlgorithm)` for supported optimizers\n * `shuffle`: Whether to randomly shuffle the data at each iteration (epoch) [def: `true`]\n * `descr`: An optional title and/or description for this model\n * `cb`: A call back function to provide information during training [def: `fitting_info`]\n * `rng`: Random Number Generator (see [`FIXEDSEED`](@ref)) [deafult: `Random.GLOBAL_RNG`]\n\n# Notes:\n\n * data must be numerical\n * the label should be be a *n-records* vector.\n\n# Example:\n\n```julia\njulia> using MLJ\n\njulia> X, y = @load_boston;\n\njulia> modelType = @load NeuralNetworkRegressor pkg = \"BetaML\" verbosity=0\nBetaML.Nn.NeuralNetworkRegressor\n\njulia> layers = [BetaML.DenseLayer(12,20,f=BetaML.relu),BetaML.DenseLayer(20,20,f=BetaML.relu),BetaML.DenseLayer(20,1,f=BetaML.relu)];\n\njulia> model = modelType(layers=layers,opt_alg=BetaML.ADAM());\nNeuralNetworkRegressor(\n layers = BetaML.Nn.AbstractLayer[BetaML.Nn.DenseLayer([-0.23249759178069676 -0.4125090172711131 … 0.41401934928739 -0.33017881111237535; -0.27912169279319965 0.270551221249931 … 0.19258414323473344 0.1703002982374256; … ; 0.31186742456482447 0.14776438287394805 … 0.3624993442655036 0.1438885872964824; 0.24363744610286758 -0.3221033024934767 … 0.14886090419299408 0.038411663101909355], [-0.42360286004241765, -0.34355377040029594, 0.11510963232946697, 0.29078650404397893, -0.04940236502546075, 0.05142849152316714, -0.177685375947775, 0.3857630523957018, -0.25454667127064756, -0.1726731848206195, 0.29832456225553444, -0.21138505291162835, -0.15763643112604903, -0.08477044513587562, -0.38436681165349196, 0.20538016429104916, -0.25008157754468335, 0.268681800562054, 0.10600581996650865, 0.4262194464325672], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.08534180387478185 0.19659398307677617 … -0.3413633217504578 -0.0484925247381256; 0.0024419192794883915 -0.14614102508129 … -0.21912059923003044 0.2680725396694708; … ; 0.25151545823147886 -0.27532269951606037 … 0.20739970895058063 0.2891938885916349; -0.1699020711688904 -0.1350423717084296 … 0.16947589410758873 0.3629006047373296], [0.2158116357688406, -0.3255582642532289, -0.057314442103850394, 0.29029696770539953, 0.24994080694366455, 0.3624239027782297, -0.30674318230919984, -0.3854738338935017, 0.10809721838554087, 0.16073511121016176, -0.005923262068960489, 0.3157147976348795, -0.10938918304264739, -0.24521229198853187, -0.307167732178712, 0.0808907777008302, -0.014577497150872254, -0.0011287181458157214, 0.07522282588658086, 0.043366500526073104], BetaML.Utils.relu, BetaML.Utils.drelu), BetaML.Nn.DenseLayer([-0.021367697115938555 -0.28326652172347155 … 0.05346175368370165 -0.26037328415871647], [-0.2313659199724562], BetaML.Utils.relu, BetaML.Utils.drelu)], \n loss = BetaML.Utils.squared_cost, \n dloss = BetaML.Utils.dsquared_cost, \n epochs = 100, \n batch_size = 32, \n opt_alg = BetaML.Nn.ADAM(BetaML.Nn.var\"#90#93\"(), 1.0, 0.9, 0.999, 1.0e-8, BetaML.Nn.Learnable[], BetaML.Nn.Learnable[]), \n shuffle = true, \n descr = \"\", \n cb = BetaML.Nn.fitting_info, \n rng = Random._GLOBAL_RNG())\n\njulia> mach = machine(model, X, y);\n\njulia> fit!(mach);\n\njulia> ŷ = predict(mach, X);\n\njulia> hcat(y,ŷ)\n506×2 Matrix{Float64}:\n 24.0 30.7726\n 21.6 28.0811\n 34.7 31.3194\n ⋮ \n 23.9 30.9032\n 22.0 29.49\n 11.9 27.2438\n```\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/sylvaticus/BetaML.jl" +":package_name" = "BetaML" +":name" = "NeuralNetworkRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":fit", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" [BetaML.MultitargetGaussianMixtureRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -404,10 +404,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Vector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -441,10 +441,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Infinite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.MultitargetNeuralNetworkRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -478,10 +478,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.DecisionTreeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Function\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -515,10 +515,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GeneralImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{String, Vector{Int64}}\", \"Any\", \"Union{Bool, Vector{Bool}}\", \"Union{Function, Vector{Function}}\", \"Union{Function, Vector{Function}}\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -552,10 +552,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Known}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Known}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Known}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.NeuralNetworkClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Vector{BetaML.Nn.AbstractLayer}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Function}\", \"Int64\", \"Int64\", \"BetaML.Nn.OptimisationAlgorithm\", \"Bool\", \"String\", \"Function\", \"Union{Nothing, Vector}\", \"String\", \"Any\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -589,10 +589,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}, AbstractMatrix{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.SimpleImputer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Function\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -626,10 +626,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.GaussianMixtureClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"AbstractVector{Float64}\", \"Union{Type, Vector{<:BetaML.GMM.AbstractMixture}}\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Int64\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -663,10 +663,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{Missing, ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`AbstractArray{<:ScientificTypesBase.Multiclass}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.KernelPerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Function\", \"Int64\", \"Union{Nothing, Vector{Vector{Int64}}}\", \"Bool\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -700,10 +700,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Infinite}}, AbstractMatrix{<:ScientificTypesBase.Infinite}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [BetaML.KMedoidsClusterer] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Function\", \"String\", \"Union{Nothing, Matrix{Float64}}\", \"Random.AbstractRNG\")`" ":package_uuid" = "024491cd-cc6b-443e-8034-08ea7eb7db2b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -737,642 +737,87 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJTransforms.Standardizer] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[MLJEnsembles.EnsembleModel] +":constructor" = "`EnsembleModel`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" +":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.Standardizer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" -":is_pure_julia" = "`true`" -":human_name" = "standardizer" -":is_supervised" = "`false`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "unknown" +":prediction_type" = ":probabilistic" +":load_path" = "MLJEnsembles.EnsembleModel" +":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" +":is_pure_julia" = "`false`" +":human_name" = "probabilistic ensemble model" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=unknown\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "Standardizer" -":target_in_fit" = "`false`" +":docstring" = """```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" +":package_name" = "MLJEnsembles" +":name" = "EnsembleModel" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":input_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":is_wrapper" = "`true`" -[MLJTransforms.UnivariateTimeTypeToContinuous] +[CatBoost.CatBoostRegressor] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, String, PythonCall.Py}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" +":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" -":hyperparameters" = "`(:zero_time, :step)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable transformer that creates continuous representations of temporally typed data" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=unknown\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateTimeTypeToContinuous" -":target_in_fit" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostRegressor" +":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :one_hot_max_size, :random_strength, :custom_metric, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :ctr_target_border_count, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" +":is_pure_julia" = "`false`" +":human_name" = "CatBoost regressor" +":is_supervised" = "`true`" +":iteration_parameter" = ":iterations" +":docstring" = """```\nCatBoostRegressor\n```\n\nA model type for constructing a CatBoost regressor, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCatBoostRegressor = @load CatBoostRegressor pkg=CatBoost\n```\n\nDo `model = CatBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostRegressor(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostRegressor model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = [2.0, 4.0, 6.0, 7.0]\n\nmodel = CatBoostRegressor(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\npreds = predict(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostRegressor`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/CatBoost.jl" +":package_name" = "CatBoost" +":name" = "CatBoostRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":reports_feature_importances" = "`true`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -[MLJTransforms.OneHotEncoder] +[CatBoost.CatBoostClassifier] ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.OneHotEncoder" -":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" -":is_pure_julia" = "`true`" -":human_name" = "one-hot encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=unknown\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "OneHotEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.ContinuousEncoder] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.ContinuousEncoder" -":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" -":is_pure_julia" = "`true`" -":human_name" = "continuous encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=unknown\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "ContinuousEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJTransforms.FrequencyEncoder] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.FrequencyEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" -":is_pure_julia" = "`true`" -":human_name" = "frequency encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "FrequencyEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.TargetEncoder] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Real\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.TargetEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" -":is_pure_julia" = "`true`" -":human_name" = "target encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using\n\nempirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable \ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "TargetEncoder" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.UnivariateBoxCoxTransformer] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" -":hyperparameters" = "`(:n, :shift)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable Box-Cox transformer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=unknown\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateBoxCoxTransformer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":is_wrapper" = "`false`" - -[MLJTransforms.InteractionTransformer] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Static`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.InteractionTransformer" -":hyperparameters" = "`(:order, :features)`" -":is_pure_julia" = "`true`" -":human_name" = "interaction transformer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=unknown\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "InteractionTransformer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.UnivariateDiscretizer] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\",)`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing,)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateDiscretizer" -":hyperparameters" = "`(:n_classes,)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=unknown\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateDiscretizer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":is_wrapper" = "`false`" - -[MLJTransforms.CardinalityReducer] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.CardinalityReducer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" -":is_pure_julia" = "`true`" -":human_name" = "cardinality reducer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency < `min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be\n\nan integer or a float which decides whether raw counts or normalized frequencies are used.\n\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "CardinalityReducer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.OrdinalEncoder] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.OrdinalEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" -":is_pure_julia" = "`true`" -":human_name" = "ordinal encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "OrdinalEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.FillImputer] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.FillImputer" -":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" -":is_pure_julia" = "`true`" -":human_name" = "fill imputer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=unknown\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "FillImputer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.MissingnessEncoder] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.MissingnessEncoder" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" -":is_pure_julia" = "`true`" -":human_name" = "missingness encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "MissingnessEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.ContrastEncoder] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.ContrastEncoder" -":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" -":is_pure_julia" = "`true`" -":human_name" = "contrast encoder" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`.\n\nIf `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname, k)`,\n\nwhere `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false, \n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "ContrastEncoder" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" - -[MLJTransforms.UnivariateStandardizer] -":constructor" = "`nothing`" -":hyperparameter_types" = "`()`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`()`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateStandardizer" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateStandardizer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":is_wrapper" = "`false`" - -[MLJTransforms.UnivariateFillImputer] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" -":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" -":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJTransforms.UnivariateFillImputer" -":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable fill imputer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=unknown\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" -":package_name" = "MLJTransforms" -":name" = "UnivariateFillImputer" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":is_wrapper" = "`false`" - -[CatBoost.CatBoostRegressor] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, String, PythonCall.Py}\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" -":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "CatBoost.MLJCatBoostInterface.CatBoostRegressor" -":hyperparameters" = "`(:iterations, :learning_rate, :depth, :l2_leaf_reg, :model_size_reg, :rsm, :loss_function, :border_count, :feature_border_type, :per_float_feature_quantization, :input_borders, :output_borders, :fold_permutation_block, :nan_mode, :counter_calc_method, :leaf_estimation_iterations, :leaf_estimation_method, :thread_count, :random_seed, :metric_period, :ctr_leaf_count_limit, :store_all_simple_ctr, :max_ctr_complexity, :has_time, :allow_const_label, :target_border, :one_hot_max_size, :random_strength, :custom_metric, :bagging_temperature, :fold_len_multiplier, :used_ram_limit, :gpu_ram_part, :pinned_memory_size, :allow_writing_files, :approx_on_full_history, :boosting_type, :simple_ctr, :combinations_ctr, :per_feature_ctr, :ctr_target_border_count, :task_type, :devices, :bootstrap_type, :subsample, :sampling_frequency, :sampling_unit, :gpu_cat_features_storage, :data_partition, :early_stopping_rounds, :grow_policy, :min_data_in_leaf, :max_leaves, :leaf_estimation_backtracking, :feature_weights, :penalties_coefficient, :model_shrink_rate, :model_shrink_mode, :langevin, :diffusion_temperature, :posterior_sampling, :boost_from_average, :text_processing)`" -":is_pure_julia" = "`false`" -":human_name" = "CatBoost regressor" -":is_supervised" = "`true`" -":iteration_parameter" = ":iterations" -":docstring" = """```\nCatBoostRegressor\n```\n\nA model type for constructing a CatBoost regressor, based on [CatBoost.jl](https://github.com/JuliaAI/CatBoost.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCatBoostRegressor = @load CatBoostRegressor pkg=CatBoost\n```\n\nDo `model = CatBoostRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CatBoostRegressor(iterations=...)`.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, `Finite`, `Textual`; check column scitypes with `schema(X)`. `Textual` columns will be passed to catboost as `text_features`, `Multiclass` columns will be passed to catboost as `cat_features`, and `OrderedFactor` columns will be converted to integers.\n * `y`: the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nMore details on the catboost hyperparameters, here are the Python docs: https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier#parameters\n\n# Operations\n\n * `predict(mach, Xnew)`: probabilistic predictions of the target given new features `Xnew` having the same scitype as `X` above.\n\n# Accessor functions\n\n * `feature_importances(mach)`: return vector of feature importances, in the form of `feature::Symbol => importance::Real` pairs\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `model`: The Python CatBoostRegressor model\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `feature_importances`: Vector{Pair{Symbol, Float64}} of feature importances\n\n# Examples\n\n```\nusing CatBoost.MLJCatBoostInterface\nusing MLJ\n\nX = (\n duration = [1.5, 4.1, 5.0, 6.7], \n n_phone_calls = [4, 5, 6, 7], \n department = coerce([\"acc\", \"ops\", \"acc\", \"ops\"], Multiclass), \n)\ny = [2.0, 4.0, 6.0, 7.0]\n\nmodel = CatBoostRegressor(iterations=5)\nmach = machine(model, X, y)\nfit!(mach)\npreds = predict(mach, X)\n```\n\nSee also [catboost](https://github.com/catboost/catboost) and the unwrapped model type [`CatBoost.CatBoostRegressor`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/CatBoost.jl" -":package_name" = "CatBoost" -":name" = "CatBoostRegressor" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":reformat", ":selectrows", ":update"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`true`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[CatBoost.CatBoostClassifier] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" -":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Int64\", \"String\", \"String\", \"Union{Nothing, Int64}\", \"Union{Nothing, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Bool\", \"Union{Nothing, Float64}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, Bool}\", \"Union{Nothing, Int64}\", \"Float64\", \"Float64\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, String}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, PythonCall.Py}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"String\", \"String\", \"Union{Nothing, String}\", \"Union{Nothing, Int64}\", \"String\", \"Int64\", \"Int64\", \"String\", \"Union{Nothing, PythonCall.Py}\", \"Float64\", \"Union{Nothing, Float64}\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Union{Nothing, Bool}\", \"Union{Nothing, PythonCall.Py}\")`" +":package_uuid" = "e2e10f9a-a85d-4fa9-b6b2-639a32100a12" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" @@ -1403,10 +848,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [NearestNeighborModels.KNNClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1440,10 +885,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [NearestNeighborModels.MultitargetKNNClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\", \"Type{<:Union{AbstractDict{<:AbstractString, <:AbstractVector}, AbstractDict{Symbol, <:AbstractVector}, NamedTuple{names, T} where {N, names, T<:NTuple{N, AbstractVector}}}}\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1477,10 +922,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [NearestNeighborModels.MultitargetKNNRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1514,10 +959,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [NearestNeighborModels.KNNRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Distances.Metric\", \"Int64\", \"Bool\", \"NearestNeighborModels.KNNKernel\")`" ":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1551,10 +996,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJXGBoostInterface.XGBoostCount] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1588,10 +1033,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJXGBoostInterface.XGBoostRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1625,10 +1070,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJXGBoostInterface.XGBoostClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"String\", \"Union{Bool, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"String\", \"Float64\", \"Float64\", \"Union{Nothing, String}\", \"Union{Bool, Int64}\", \"String\", \"String\", \"Int64\", \"Int64\", \"String\", \"String\", \"String\", \"Float64\", \"Union{Bool, Int64}\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Any\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Bool\", \"Vector{String}\", \"Union{Nothing, String}\")`" ":package_uuid" = "009559a3-9522-5dbb-924b-0b6ed2b22bb9" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1662,10 +1107,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ProbabilisticSGDClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1699,10 +1144,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"AbstractArray{Float64}\", \"Bool\", \"Any\", \"Int64\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1736,10 +1181,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Bool\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Any\", \"Any\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1773,10 +1218,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1810,10 +1255,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1847,10 +1292,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PerceptronClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, String}\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1884,10 +1329,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1921,10 +1366,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -1958,10 +1403,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HDBSCAN] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Union{Nothing, Int64}\", \"String\", \"Float64\", \"String\", \"Int64\", \"String\", \"Bool\", \"Union{Nothing, String}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -1995,10 +1440,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DBSCAN] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Union{Nothing, Float64}\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2032,10 +1477,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2069,10 +1514,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsICRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2106,10 +1551,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ARDRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2143,10 +1588,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Any\", \"Float64\", \"Int64\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2180,10 +1625,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2217,10 +1662,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Float64\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2254,10 +1699,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ComplementNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -2291,10 +1736,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HuberRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2328,10 +1773,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMNuClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2365,10 +1810,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2402,10 +1847,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Union{Float64, AbstractArray}\", \"Any\", \"Int64\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2439,10 +1884,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Bool\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2476,10 +1921,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2513,10 +1958,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MeanShift] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Float64}\", \"Union{Nothing, AbstractArray}\", \"Bool\", \"Int64\", \"Bool\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2550,10 +1995,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2587,10 +2032,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -2624,10 +2069,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AffinityPropagation] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"String\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2661,10 +2106,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskLassoCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2698,10 +2143,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, Int64}\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -2735,10 +2180,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BernoulliNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Nothing, Float64}\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -2772,10 +2217,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Any\", \"Bool\", \"Any\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2809,10 +2254,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RidgeCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Any\", \"Any\", \"Union{Nothing, String}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2846,10 +2291,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Any\", \"Int64\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2883,10 +2328,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector{Float64}}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -2920,10 +2365,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2957,10 +2402,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Int64, String}\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -2994,10 +2439,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Vector{Float64}}\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Int64\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3031,10 +2476,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3068,10 +2513,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OrthogonalMatchingPursuitCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Union{Nothing, Int64}\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3105,10 +2550,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AdaBoostClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Float64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -3142,10 +2587,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.PassiveAggressiveRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Bool\", \"Union{Bool, Int64}\", \"String\", \"Float64\", \"Any\", \"Bool\", \"Union{Bool, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3179,10 +2624,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianRidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3216,10 +2661,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GaussianProcessClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Int64\", \"Bool\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3253,10 +2698,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3290,10 +2735,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.OPTICS] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Float64, Int64}\", \"Float64\", \"String\", \"Int64\", \"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Bool\", \"Union{Nothing, Float64, Int64}\", \"String\", \"Int64\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3327,10 +2772,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RANSACRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Union{Float64, Int64}\", \"Union{Nothing, Float64}\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Function, String}\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3364,10 +2809,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3401,10 +2846,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.HistGradientBoostingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Union{Nothing, Vector}\", \"Union{Nothing, Dict, Vector}\", \"Any\", \"Bool\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Int64}\", \"Float64\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3438,10 +2883,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MiniBatchKMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Union{Int64, String}\", \"Union{String, AbstractArray}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3475,10 +2920,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Any\", \"Bool\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Union{Bool, Int64}\", \"Union{Nothing, Int64}\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3512,10 +2957,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Any\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -3549,10 +2994,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BisectingKMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"String\", \"Union{String, AbstractArray}\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3586,10 +3031,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoLarsRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, Int64}\", \"Union{Bool, String, AbstractMatrix}\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3623,10 +3068,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LarsCVRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Bool, Int64}\", \"Int64\", \"Union{Bool, String, AbstractMatrix}\", \"Any\", \"Int64\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3660,10 +3105,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.KNeighborsClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Function, String}\", \"String\", \"Int64\", \"Int64\", \"Any\", \"Any\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3697,10 +3142,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMLinearClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Bool\", \"Float64\", \"Float64\", \"String\", \"Bool\", \"Float64\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3734,10 +3179,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.FeatureAgglomeration] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Any\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3771,10 +3216,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.DummyClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Any\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -3808,10 +3253,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BaggingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Int64\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3845,10 +3290,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianQDA] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Nothing, AbstractVector}\", \"Float64\", \"Bool\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -3882,10 +3327,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.BayesianLDA] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Union{Nothing, Float64, String}\", \"Union{Nothing, AbstractVector}\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3919,10 +3364,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SGDClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Int64\", \"Union{Nothing, Float64}\", \"Bool\", \"Int64\", \"Float64\", \"Union{Nothing, Int64}\", \"Any\", \"String\", \"Float64\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Any\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3956,10 +3401,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.TheilSenRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"Int64\", \"Float64\", \"Any\", \"Union{Nothing, Int64}\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -3993,10 +3438,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SpectralClustering] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, String}\", \"Any\", \"Int64\", \"Float64\", \"String\", \"Int64\", \"Float64\", \"String\", \"Union{Nothing, Int64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4030,10 +3475,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.Birch] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -4067,10 +3512,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.AgglomerativeClustering] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Any\", \"Any\", \"Union{Bool, String}\", \"String\", \"Union{Nothing, Float64}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4104,10 +3549,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Int64\", \"Bool\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4141,10 +3586,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Any\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4178,10 +3623,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LogisticCVClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Int64, AbstractVector{Float64}}\", \"Bool\", \"Any\", \"Bool\", \"String\", \"Any\", \"String\", \"Float64\", \"Int64\", \"Any\", \"Union{Nothing, Int64}\", \"Int64\", \"Bool\", \"Float64\", \"String\", \"Any\", \"Union{Nothing, AbstractVector{Float64}}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4215,10 +3660,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultiTaskElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Float64, Vector{Float64}}\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4252,10 +3697,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.ExtraTreesRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"String\", \"Union{Nothing, Int64}\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Union{Nothing, Float64, Int64, String}\", \"Union{Nothing, Int64}\", \"Float64\", \"Bool\", \"Bool\", \"Union{Nothing, Int64}\", \"Any\", \"Int64\", \"Bool\", \"Float64\", \"Union{Nothing, Float64, Int64}\", \"Union{Nothing, Dict, Vector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4289,10 +3734,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.LassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Bool, AbstractMatrix}\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Any\", \"String\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4326,10 +3771,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.MultinomialNBClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Bool\", \"Union{Nothing, AbstractVector}\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -4363,10 +3808,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.GradientBoostingRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Union{Float64, Int64}\", \"Union{Float64, Int64}\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\", \"Union{Nothing, Float64, Int64, String}\", \"Float64\", \"Int64\", \"Union{Nothing, Int64}\", \"Bool\", \"Float64\", \"Union{Nothing, Int64}\", \"Float64\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4400,10 +3845,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJScikitLearnInterface.SVMClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Float64\", \"Union{Function, String}\", \"Int64\", \"Union{Float64, String}\", \"Float64\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"String\", \"Any\")`" ":package_uuid" = "3646fa90-6ef7-5e7e-9f22-8aca16db6324" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4437,10 +3882,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.ABODDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4474,10 +3919,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.DNNDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Real\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4511,10 +3956,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.LOFDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4548,10 +3993,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.KNNDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\", \"Symbol\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4585,10 +4030,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [OutlierDetectionNeighbors.COFDetector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Integer\", \"Distances.Metric\", \"Symbol\", \"Union{Bool, Symbol}\", \"Integer\", \"Bool\", \"Bool\")`" ":package_uuid" = "51249a0a-cb36-4849-8e04-30c7f8d311bb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4622,10 +4067,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [SIRUS.StableRulesClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4659,10 +4104,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [SIRUS.StableForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4696,10 +4141,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [SIRUS.StableRulesRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4733,10 +4178,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [SIRUS.StableForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`" ":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4770,10 +4215,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJIteration.IteratedModel] -":constructor" = "`IteratedModel`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, Dict{Any, <:Real}}\", \"Any\", \"Bool\", \"Bool\", \"Union{Nothing, Expr, Symbol}\", \"Bool\")`" ":package_uuid" = "614be32b-d00c-4edb-bd02-1eb411ab5e55" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4807,10 +4252,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`IteratedModel`" [MLJTSVDInterface.TSVDTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\")`" ":package_uuid" = "9449cd9e-2762-5aa3-a617-5413e99d722e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -4844,10 +4289,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [PartitionedLS.PartLS] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Type{PartitionedLS.Alt}, Type{PartitionedLS.BnB}, Type{PartitionedLS.Opt}}\", \"Matrix{Int64}\", \"AbstractFloat\", \"AbstractFloat\", \"Int64\", \"Union{Nothing, Int64, Random.AbstractRNG}\")`" ":package_uuid" = "19f41c5e-8610-11e9-2f2a-0d67e7c5027f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4881,10 +4326,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{AbstractVector{ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLinearModels.QuantileRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4918,10 +4363,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LogisticClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4955,10 +4400,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.MultinomialClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -4992,10 +4437,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LADRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5029,10 +4474,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.RidgeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5066,10 +4511,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.RobustRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"MLJLinearModels.RobustRho\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5103,10 +4548,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.ElasticNetRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5140,10 +4585,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -5177,10 +4622,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.LassoRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5214,10 +4659,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJLinearModels.HuberRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Real\", \"Real\", \"Real\", \"Union{String, Symbol}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, MLJLinearModels.Solver}\")`" ":package_uuid" = "6ee0df7b-362f-4a72-a706-9e79364fb692" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5251,10 +4696,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [Maxnet.MaxnetBinaryClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{String, Vector{<:Maxnet.AbstractFeatureClass}}\", \"Float64\", \"Any\", \"Bool\", \"Integer\", \"Float64\", \"GLM.Link\", \"Bool\", \"Any\")`" ":package_uuid" = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5288,10 +4733,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [ParallelKMeans.KMeans] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Symbol, ParallelKMeans.AbstractKMeansAlg}\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Union{Int64, Random.AbstractRNG}\", \"Any\", \"Any\")`" ":package_uuid" = "42b8e9d4-006b-409a-8472-7f34b3fb58af" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5325,10 +4770,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJNaiveBayesInterface.GaussianNBClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`()`" ":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" ":hyperparameter_ranges" = "`()`" @@ -5362,10 +4807,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJNaiveBayesInterface.MultinomialNBClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\",)`" ":package_uuid" = "9bbee03b-0db5-5f46-924f-b5c9c21b8c60" ":hyperparameter_ranges" = "`(nothing,)`" @@ -5399,10 +4844,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Count}}, AbstractMatrix{<:ScientificTypesBase.Count}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJDecisionTreeInterface.AdaBoostStumpClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5436,10 +4881,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.DecisionTreeRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5473,10 +4918,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.DecisionTreeClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Bool\", \"Float64\", \"Int64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5510,10 +4955,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.RandomForestRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5547,10 +4992,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJDecisionTreeInterface.RandomForestClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Symbol\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5584,10 +5029,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJBase.Pipeline] -":constructor" = "`Pipeline`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"NamedTuple\", \"Bool\")`" ":package_uuid" = "unknown" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -5621,10 +5066,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`Pipeline`" [MLJBase.Resampler] -":constructor" = "`MLJBase.Resampler`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict{<:Any, <:Real}}\", \"Any\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Int64\", \"Bool\", \"Bool\", \"Any\", \"Bool\")`" ":package_uuid" = "unknown" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5658,10 +5103,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`MLJBase.Resampler`" [MLJBase.Stack] -":constructor" = "`MLJBase.Stack`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"Vector{MLJModelInterface.Supervised}\", \"MLJModelInterface.Probabilistic\", \"Any\", \"Union{Nothing, AbstractVector}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" ":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -5695,10 +5140,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`MLJBase.Stack`" [MLJBase.TransformedTargetModel] -":constructor" = "`TransformedTargetModel`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Any\", \"Any\", \"Any\")`" ":package_uuid" = "a7f614a8-145f-11e9-1d2a-a57a1082229d" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -5732,10 +5177,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`TransformedTargetModel`" [MLJClusteringInterface.HierarchicalClustering] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Distances.SemiMetric\", \"Symbol\", \"Union{Nothing, Float64}\", \"Int64\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5769,10 +5214,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.DBSCAN] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Real\", \"Int64\", \"Int64\", \"Int64\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -5806,10 +5251,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.KMeans] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5843,10 +5288,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.AffinityPropagation] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Int64\", \"Float64\", \"Union{Nothing, Float64}\", \"Distances.SemiMetric\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -5880,10 +5325,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJClusteringInterface.KMedoids] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Int64\", \"Distances.SemiMetric\", \"Any\")`" ":package_uuid" = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5917,10 +5362,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJBalancing.BalancedBaggingClassifier] -":is_wrapper" = "`true`" +":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" ":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\")`" ":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -5954,10 +5399,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`MLJBalancing.BalancedBaggingClassifier`" +":is_wrapper" = "`true`" [MLJBalancing.BalancedModel] -":is_wrapper" = "`true`" +":constructor" = "`BalancedModel`" ":hyperparameter_types" = "`(\"Any\", \"MLJModelInterface.Probabilistic\")`" ":package_uuid" = "45f359ea-796d-4f51-95a5-deb1a414c586" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -5991,10 +5436,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`BalancedModel`" +":is_wrapper" = "`true`" [Imbalance.RandomOversampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6028,10 +5473,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.SMOTENC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Any\", \"AbstractString\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -6065,10 +5510,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.TomekUndersampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -6102,10 +5547,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.ClusterUndersampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"AbstractString\", \"Any\", \"Integer\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -6139,10 +5584,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.SMOTE] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -6176,47 +5621,47 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" - -[Imbalance.SMOTEN] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" + +[Imbalance.RandomUndersampler] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "Imbalance.MLJ.SMOTEN" -":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" +":load_path" = "Imbalance.MLJ.RandomUndersampler" +":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" ":is_pure_julia" = "`true`" -":human_name" = "smoten" +":human_name" = "random undersampler" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of integers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Finite`. That is, for table inputs each column should have either `OrderedFactor` or `Multiclass` as the element [scitype](https://juliaai.github.io/ScientificTypes.jl/).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":docstring" = """Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" ":package_name" = "Imbalance" -":name" = "SMOTEN" +":name" = "RandomUndersampler" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] +":implemented_methods" = [":transform_scitype", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" -":constructor" = "`nothing`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":is_wrapper" = "`false`" [Imbalance.ROSE] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"AbstractFloat\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -6250,47 +5695,47 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" - -[Imbalance.RandomUndersampler] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" + +[Imbalance.SMOTEN] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" +":output_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "Imbalance.MLJ.RandomUndersampler" -":hyperparameters" = "`(:ratios, :rng, :try_preserve_type)`" +":load_path" = "Imbalance.MLJ.SMOTEN" +":hyperparameters" = "`(:k, :ratios, :rng, :try_preserve_type)`" ":is_pure_julia" = "`true`" -":human_name" = "random undersampler" +":human_name" = "smoten" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """Initiate a random undersampling model with the given hyper-parameters.\n\n```\nRandomUndersampler\n```\n\nA model type for constructing a random undersampler, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n```\n\nDo `model = RandomUndersampler()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RandomUndersampler(ratios=...)`.\n\n`RandomUndersampler` implements naive undersampling by randomly removing existing observations. \n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by mach = machine(model)\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`. \n\nFor default values of the hyper-parameters, model can be constructed by model = RandomUndersampler()\n\n# Hyperparameters\n\n * `ratios=1.0`: A parameter that controls the amount of undersampling to be done for each class\n\n * Can be a float and in this case each class will be undersampled to the size of the minority class times the float. By default, all classes are undersampled to the size of the minority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of real numbers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Union{Finite, Infinite}`. Elements in nominal columns should subtype `Finite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `OrderedFactor` or `Multiclass`) and elements in continuous columns should subtype `Infinite` (i.e., have [scitype](https://juliaai.github.io/ScientificTypes.jl/) `Count` or `Continuous`).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `X_under`: A matrix or table that includes the data after undersampling depending on whether the input `X` is a matrix or table respectively\n * `y_under`: An abstract vector of labels corresponding to `X_under`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using RandomUndersampler, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows, num_continuous_feats = 100, 5\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, rng=42) \n\njulia> Imbalance.checkbalance(y; ref=\"minority\")\n 1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n 2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (173.7%) \n 0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (252.6%) \n\n# load RandomUndersampler\nRandomUndersampler = @load RandomUndersampler pkg=Imbalance\n\n# wrap the model in a machine\nundersampler = RandomUndersampler(ratios=Dict(0=>1.0, 1=> 1.0, 2=>1.0), \n rng=42)\nmach = machine(undersampler)\n\n# provide the data to transform (there is nothing to fit)\nX_under, y_under = transform(mach, X, y)\n \njulia> Imbalance.checkbalance(y_under; ref=\"minority\")\n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (100.0%) \n```\n""" -":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" +":docstring" = """Initiate a SMOTEN model with the given hyper-parameters.\n\n```\nSMOTEN\n```\n\nA model type for constructing a smoten, based on [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSMOTEN = @load SMOTEN pkg=Imbalance\n```\n\nDo `model = SMOTEN()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SMOTEN(k=...)`.\n\n`SMOTEN` implements the SMOTEN algorithm to correct for class imbalance as in N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, “SMOTEN: synthetic minority over-sampling technique,” Journal of artificial intelligence research, 321-357, 2002.\n\n# Training data\n\nIn MLJ or MLJBase, wrap the model in a machine by\n\n```\nmach = machine(model)\n```\n\nThere is no need to provide any data here because the model is a static transformer.\n\nLikewise, there is no need to `fit!(mach)`.\n\nFor default values of the hyper-parameters, model can be constructed by\n\n```\nmodel = SMOTEN()\n```\n\n# Hyperparameters\n\n * `k=5`: Number of nearest neighbors to consider in the SMOTEN algorithm. Should be within the range `[1, n - 1]`, where `n` is the number of observations; otherwise set to the nearest of these two values.\n * `ratios=1.0`: A parameter that controls the amount of oversampling to be done for each class\n\n * Can be a float and in this case each class will be oversampled to the size of the majority class times the float. By default, all classes are oversampled to the size of the majority class\n * Can be a dictionary mapping each class label to the float ratio for that class\n\n * `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer` seed to be used with `Xoshiro` if the Julia `VERSION` supports it. Otherwise, uses MersenneTwister`.\n\n# Transform Inputs\n\n * `X`: A matrix of integers or a table with element [scitypes](https://juliaai.github.io/ScientificTypes.jl/) that subtype `Finite`. That is, for table inputs each column should have either `OrderedFactor` or `Multiclass` as the element [scitype](https://juliaai.github.io/ScientificTypes.jl/).\n * `y`: An abstract vector of labels (e.g., strings) that correspond to the observations in `X`\n\n# Transform Outputs\n\n * `Xover`: A matrix or table that includes original data and the new observations due to oversampling. depending on whether the input `X` is a matrix or table respectively\n * `yover`: An abstract vector of labels corresponding to `Xover`\n\n# Operations\n\n * `transform(mach, X, y)`: resample the data `X` and `y` using SMOTEN, returning both the new and original observations\n\n# Example\n\n```julia\nusing MLJ\nusing ScientificTypes\nimport Imbalance\n\n# set probability of each class\nclass_probs = [0.5, 0.2, 0.3] \nnum_rows = 100\nnum_continuous_feats = 0\n# want two categorical features with three and two possible values respectively\nnum_vals_per_category = [3, 2]\n\n# generate a table and categorical vector accordingly\nX, y = Imbalance.generate_imbalanced_data(num_rows, num_continuous_feats; \n class_probs, num_vals_per_category, rng=42) \njulia> Imbalance.checkbalance(y)\n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 19 (39.6%) \n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 33 (68.8%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n\njulia> ScientificTypes.schema(X).scitypes\n(Count, Count)\n\n# coerce to a finite scitype (multiclass or ordered factor)\nX = coerce(X, autotype(X, :few_to_finite))\n\n# load SMOTEN\nSMOTEN = @load SMOTEN pkg=Imbalance\n\n# wrap the model in a machine\noversampler = SMOTEN(k=5, ratios=Dict(0=>1.0, 1=> 0.9, 2=>0.8), rng=42)\nmach = machine(oversampler)\n\n# provide the data to transform (there is nothing to fit)\nXover, yover = transform(mach, X, y)\n\njulia> Imbalance.checkbalance(yover)\n2: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 38 (79.2%) \n1: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 43 (89.6%) \n0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 48 (100.0%) \n```\n""" +":inverse_transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" ":package_url" = "https://github.com/JuliaAI/Imbalance.jl" ":package_name" = "Imbalance" -":name" = "RandomUndersampler" +":name" = "SMOTEN" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":transform_scitype", ":transform"] +":implemented_methods" = [":transform_scitype", ":clean!", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Infinite}}, AbstractVector}`" -":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractMatrix{<:ScientificTypesBase.Finite}}, AbstractVector}`" +":is_wrapper" = "`false`" [Imbalance.ENNUndersampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"AbstractString\", \"Any\", \"Bool\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6324,10 +5769,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.BorderlineSMOTE1] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"Integer\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6361,10 +5806,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" ":transform_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [Imbalance.RandomWalkOversampler] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\")`" ":package_uuid" = "c709b415-507b-45b7-9a3d-1767c89fde68" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6398,10 +5843,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" ":transform_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}, AbstractVector}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJTuning.TunedModel] -":is_wrapper" = "`true`" +":constructor" = "`TunedModel`" ":hyperparameter_types" = "`(\"Union{MLJModelInterface.Probabilistic, MLJModelInterface.ProbabilisticSupervisedDetector, MLJModelInterface.ProbabilisticUnsupervisedDetector}\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, AbstractVector{<:Real}}\", \"Union{Nothing, AbstractDict}\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Int64\", \"Union{Nothing, Int64}\", \"ComputationalResources.AbstractResource\", \"ComputationalResources.AbstractResource\", \"Bool\", \"Bool\", \"Bool\", \"Any\")`" ":package_uuid" = "03970b2e-30c4-11ea-3135-d1576263f10f" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6435,10 +5880,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`TunedModel`" +":is_wrapper" = "`true`" [FeatureSelection.FeatureSelector] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Union{Function, Vector{Symbol}}\", \"Bool\")`" ":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -6472,10 +5917,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table`" ":transform_scitype" = "`ScientificTypesBase.Table`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [FeatureSelection.RecursiveFeatureElimination] -":constructor" = "`RecursiveFeatureElimination`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"MLJModelInterface.Supervised\", \"Float64\", \"Float64\")`" ":package_uuid" = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6509,10 +5954,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`RecursiveFeatureElimination`" [EvoLinear.EvoSplineRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Symbol\", \"Any\", \"Any\", \"Union{Nothing, Dict}\", \"Any\", \"Symbol\")`" ":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6546,10 +5991,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [EvoLinear.EvoLinearRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Any\", \"Any\", \"Any\", \"Any\", \"Symbol\")`" ":package_uuid" = "ab853011-1780-437f-b4b5-5de6f4777246" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6583,10 +6028,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJText.TfidfTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" @@ -6620,10 +6065,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJText.CountTransformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing)`" @@ -6657,10 +6102,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJText.BM25Transformer] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\")`" ":package_uuid" = "7876af07-990d-54b4-ab0e-23690620f79a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" @@ -6694,10 +6139,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{AbstractVector{<:AbstractVector{ScientificTypesBase.Textual}}, AbstractVector{<:ScientificTypesBase.Multiset{<:NTuple{var\"_s1\", ScientificTypesBase.Textual} where var\"_s1\"}}, AbstractVector{<:ScientificTypesBase.Multiset{ScientificTypesBase.Textual}}}`" ":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [LightGBM.LGBMClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Bool\", \"Any\", \"Float64\", \"Bool\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Vector{Float64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" ":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6731,10 +6176,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [LightGBM.LGBMRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"String\", \"String\", \"Int64\", \"Float64\", \"Int64\", \"String\", \"Int64\", \"String\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Bool\", \"Bool\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Int64\", \"Int64\", \"Vector{Int64}\", \"String\", \"Float64\", \"Vector{Float64}\", \"String\", \"Float64\", \"Float64\", \"Float64\", \"Vector{Float64}\", \"Vector{Float64}\", \"Float64\", \"Vector{Vector{Int64}}\", \"Int64\", \"Bool\", \"Int64\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"String\", \"String\", \"String\", \"Vector{Int64}\", \"String\", \"Bool\", \"Int64\", \"Int64\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Vector{String}\", \"Int64\", \"Bool\", \"Vector{Int64}\", \"Int64\", \"Int64\", \"Int64\", \"String\", \"String\", \"Int64\", \"Int64\", \"Bool\", \"Int64\", \"Bool\")`" ":package_uuid" = "7acf609c-83a4-11e9-1ffb-b912bcd3b04a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6768,10 +6213,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [LaplaceRedux.LaplaceClassifier] -":is_wrapper" = "`true`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\", \"Symbol\")`" ":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6805,10 +6250,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Continuous}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`true`" [LaplaceRedux.LaplaceRegressor] -":is_wrapper" = "`true`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Union{Nothing, Flux.Chain}\", \"Any\", \"Any\", \"Integer\", \"Integer\", \"Symbol\", \"Any\", \"Union{String, Symbol, LaplaceRedux.HessianStructure}\", \"Symbol\", \"Float64\", \"Float64\", \"Union{Nothing, LinearAlgebra.UniformScaling, AbstractMatrix}\", \"Int64\")`" ":package_uuid" = "c52c1a26-f7c5-402b-80be-ba1e638ad478" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6842,10 +6287,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:ScientificTypesBase.Infinite}}}, AbstractMatrix{<:Union{ScientificTypesBase.Infinite, ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`true`" [SymbolicRegression.MultitargetSRRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" ":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6879,10 +6324,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [SymbolicRegression.SRRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.Traits.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Integer\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Union{Nothing, Integer}\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Union{Nothing, Bool}\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\", \"Union{Nothing, Integer}\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Union{Nothing, Integer}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`" ":package_uuid" = "8254be44-1295-4e6a-a16d-46603ac705cb" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6916,10 +6361,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [EvoTrees.EvoTreeClassifier] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6953,10 +6398,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeGaussian] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -6990,10 +6435,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeMLE] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7027,10 +6472,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeRegressor] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7064,10 +6509,10 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [EvoTrees.EvoTreeCount] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`" ":package_uuid" = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -7101,717 +6546,828 @@ ":reports_feature_importances" = "`true`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJTestInterface] +[MLJModels.DeterministicConstantRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJModels" +":package_license" = "MIT" +":load_path" = "MLJModels.DeterministicConstantRegressor" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. """ +":name" = "DeterministicConstantRegressor" +":human_name" = "deterministic constant regressor" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":predict"] +":hyperparameters" = "`()`" +":hyperparameter_types" = "`()`" +":hyperparameter_ranges" = "`()`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + [MLJModels.ConstantClassifier] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJModels" +":package_license" = "MIT" +":load_path" = "MLJModels.ConstantClassifier" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":is_wrapper" = "`false`" +":supports_weights" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nConstantClassifier\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution `d` returned is the `UnivariateFinite` distribution based on frequency of classes observed in the training target data. So, `pdf(d, level)` is the number of times the training target takes on the value `level`. Use `predict_mode` instead of `predict` to obtain the training target mode instead. For more on the `UnivariateFinite` type, see the CategoricalDistributions.jl package.\n\nAlmost any reasonable model is expected to outperform `ConstantClassifier`, which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantClassifier()` to construct an instance.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nNone.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nclf = ConstantClassifier()\n\nX, y = @load_crabs # a table and a categorical vector\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\n# probabilistic predictions:\nyhat = predict(mach, Xnew)\nyhat[1]\n\n# raw probabilities:\npdf.(yhat, \"B\")\n\n# probability matrix:\nL = levels(y)\npdf(yhat, L)\n\n# point predictions:\npredict_mode(mach, Xnew)\n```\n\nSee also [`ConstantRegressor`](@ref)\n""" +":name" = "ConstantClassifier" +":human_name" = "constant classifier" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":fit", ":fitted_params", ":predict"] +":hyperparameters" = "`()`" ":hyperparameter_types" = "`()`" +":hyperparameter_ranges" = "`()`" +":iteration_parameter" = "`nothing`" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJModels.ConstantRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" +":is_pure_julia" = "`true`" +":package_name" = "MLJModels" +":package_license" = "MIT" +":load_path" = "MLJModels.ConstantRegressor" ":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`()`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":docstring" = """```\nConstantRegressor\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution returned is the one of the type specified that best fits the training target data. Use `predict_mean` or `predict_median` to predict the mean or median values instead. If not specified, a normal distribution is fit.\n\nAlmost any reasonable model is expected to outperform `ConstantRegressor` which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantRegressor()` or `model = ConstantRegressor(distribution=...)` to construct a model instance.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `distribution_type=Distributions.Normal`: The distribution to be fit to the target data. Must be a subtype of `Distributions.ContinuousUnivariateDistribution`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: Return instead the means of the probabilistic predictions returned above.\n * `predict_median(mach, Xnew)`: Return instead the medians of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nX, y = make_regression(10, 2) # synthetic data: a table and vector\nregressor = ConstantRegressor()\nmach = machine(regressor, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew, _ = make_regression(3, 2)\npredict(mach, Xnew)\npredict_mean(mach, Xnew)\n\n```\n\nSee also [`ConstantClassifier`](@ref)\n""" +":name" = "ConstantRegressor" +":human_name" = "constant regressor" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJModels.ConstantClassifier" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "constant classifier" ":is_supervised" = "`true`" +":prediction_type" = ":probabilistic" +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":implemented_methods" = [":fitted_params", ":predict"] +":hyperparameters" = "`(:distribution_type,)`" +":hyperparameter_types" = "`(\"Type{D} where D<:Distributions.Sampleable\",)`" +":hyperparameter_ranges" = "`(nothing,)`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nConstantClassifier\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution `d` returned is the `UnivariateFinite` distribution based on frequency of classes observed in the training target data. So, `pdf(d, level)` is the number of times the training target takes on the value `level`. Use `predict_mode` instead of `predict` to obtain the training target mode instead. For more on the `UnivariateFinite` type, see the CategoricalDistributions.jl package.\n\nAlmost any reasonable model is expected to outperform `ConstantClassifier`, which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantClassifier()` to construct an instance.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Finite`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nNone.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the mode of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nclf = ConstantClassifier()\n\nX, y = @load_crabs # a table and a categorical vector\nmach = machine(clf, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\n# probabilistic predictions:\nyhat = predict(mach, Xnew)\nyhat[1]\n\n# raw probabilities:\npdf.(yhat, \"B\")\n\n# probability matrix:\nL = levels(y)\npdf(yhat, L)\n\n# point predictions:\npredict_mode(mach, Xnew)\n```\n\nSee also [`ConstantRegressor`](@ref)\n""" +":supports_training_losses" = "`false`" +":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`nothing`" + +[MLJModels.BinaryThresholdPredictor] +":input_scitype" = "`ScientificTypesBase.Unknown`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "ConstantClassifier" ":target_in_fit" = "`true`" +":is_pure_julia" = "`false`" +":package_name" = "MLJModels" +":package_license" = "unknown" +":load_path" = "MLJModels.BinaryThresholdPredictor" +":package_uuid" = "" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`true`" +":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":predict"] -":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":docstring" = """```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n""" +":name" = "BinaryThresholdPredictor" +":human_name" = "binary threshold predictor" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [] +":hyperparameters" = "`(:model, :threshold)`" +":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Float64\")`" +":hyperparameter_ranges" = "`(nothing, nothing)`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" +":constructor" = "`MLJModels.BinaryThresholdPredictor`" + +[MLJModels.DeterministicConstantClassifier] ":input_scitype" = "`ScientificTypesBase.Table`" +":output_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJModels.Standardizer] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.Standardizer" -":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":target_in_fit" = "`true`" ":is_pure_julia" = "`true`" -":human_name" = "standardizer" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=MLJModels\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" ":package_name" = "MLJModels" -":name" = "Standardizer" -":target_in_fit" = "`false`" +":package_license" = "MIT" +":load_path" = "MLJModels.DeterministicConstantClassifier" +":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" +":package_url" = "https://github.com/JuliaAI/MLJModels.jl" +":is_wrapper" = "`false`" +":supports_weights" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":docstring" = """```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. """ +":name" = "DeterministicConstantClassifier" +":human_name" = "deterministic constant classifier" +":tags" = [] +":is_supervised" = "`true`" +":prediction_type" = ":deterministic" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":implemented_methods" = [":fit", ":predict"] +":hyperparameters" = "`()`" +":hyperparameter_types" = "`()`" +":hyperparameter_ranges" = "`()`" +":iteration_parameter" = "`nothing`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":deep_properties" = "`()`" +":reporting_operations" = "`()`" ":constructor" = "`nothing`" -[MLJModels.DeterministicConstantClassifier] -":is_wrapper" = "`false`" -":hyperparameter_types" = "`()`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`()`" +[MLJGLMInterface.LinearBinaryClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJModels.DeterministicConstantClassifier" -":hyperparameters" = "`()`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearBinaryClassifier" +":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" ":is_pure_julia" = "`true`" -":human_name" = "deterministic constant classifier" +":human_name" = "linear binary classifier" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nDeterministicConstantClassifier\n```\n\nA model type for constructing a deterministic constant classifier, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels\n```\n\nDo `model = DeterministicConstantClassifier()` to construct an instance with default hyper-parameters. """ +":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `link=GLM.LogitLink`: The function which links the linear prediction function to the probability of a particular outcome or class. This must have type `GLM.Link01`. Options include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features used during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "DeterministicConstantClassifier" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearBinaryClassifier" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":predict"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateTimeTypeToContinuous] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJGLMInterface.LinearCountRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateTimeTypeToContinuous" -":hyperparameters" = "`(:zero_time, :step)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearCountRegressor" +":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" ":is_pure_julia" = "`true`" -":human_name" = "single variable transformer that creates continuous representations of temporally typed data" -":is_supervised" = "`false`" +":human_name" = "linear count regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=MLJModels\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateTimeTypeToContinuous" -":target_in_fit" = "`false`" +":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the model should fit.\n * `link=GLM.LogLink()`: The function which links the linear prediction function to the probability of a particular outcome or class. This should be one of the following: `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`, `GLM.SqrtLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n x = Xmat[i, :]\n rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n 0.9969008753103842\n -2.0255901752504775\n 3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearCountRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" - -[MLJModels.OneHotEncoder] +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" + +[MLJGLMInterface.LinearRegressor] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`" +":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.OneHotEncoder" -":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJGLMInterface.LinearRegressor" +":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`" ":is_pure_julia" = "`true`" -":human_name" = "one-hot encoder" -":is_supervised" = "`false`" +":human_name" = "linear regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=MLJModels\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (column names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "OneHotEncoder" -":target_in_fit" = "`false`" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence. If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/GLM.jl" +":package_name" = "GLM" +":name" = "LinearRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`false`" +":supports_weights" = "`true`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJModels.ContinuousEncoder] +":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[OneRule.OneRuleClassifier] +":constructor" = "`nothing`" +":hyperparameter_types" = "`()`" +":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" +":hyperparameter_ranges" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "MLJModels.ContinuousEncoder" -":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" +":prediction_type" = ":deterministic" +":load_path" = "OneRule.OneRuleClassifier" +":hyperparameters" = "`()`" ":is_pure_julia" = "`true`" -":human_name" = "continuous encoder" -":is_supervised" = "`false`" +":human_name" = "one rule classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=MLJModels\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (columns) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping columns) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "ContinuousEncoder" -":target_in_fit" = "`false`" +":docstring" = """```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/roland-KA/OneRule.jl" +":package_name" = "OneRule" +":name" = "OneRuleClassifier" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] +":implemented_methods" = [":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateBoxCoxTransformer] +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[OutlierDetectionPython.MCDDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateBoxCoxTransformer" -":hyperparameters" = "`(:n, :shift)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable Box-Cox transformer" +":load_path" = "OutlierDetectionPython.MCDDetector" +":hyperparameters" = "`(:store_precision, :assume_centered, :support_fraction, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "mcd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=MLJModels\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateBoxCoxTransformer" +":docstring" = """```\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "MCDDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" - -[MLJModels.InteractionTransformer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[OutlierDetectionPython.COPODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Static`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.InteractionTransformer" -":hyperparameters" = "`(:order, :features)`" -":is_pure_julia" = "`true`" -":human_name" = "interaction transformer" +":load_path" = "OutlierDetectionPython.COPODDetector" +":hyperparameters" = "`(:n_jobs,)`" +":is_pure_julia" = "`false`" +":human_name" = "copod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=MLJModels\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" -":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "InteractionTransformer" +":docstring" = """```\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "COPODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":transform"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJModels.ConstantRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Type{D} where D<:Distributions.Sampleable\",)`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing,)`" + +[OutlierDetectionPython.HBOSDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJModels.ConstantRegressor" -":hyperparameters" = "`(:distribution_type,)`" -":is_pure_julia" = "`true`" -":human_name" = "constant regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.HBOSDetector" +":hyperparameters" = "`(:n_bins, :alpha, :tol)`" +":is_pure_julia" = "`false`" +":human_name" = "hbos detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nConstantRegressor\n```\n\nThis \"dummy\" probabilistic predictor always returns the same distribution, irrespective of the provided input pattern. The distribution returned is the one of the type specified that best fits the training target data. Use `predict_mean` or `predict_median` to predict the mean or median values instead. If not specified, a normal distribution is fit.\n\nAlmost any reasonable model is expected to outperform `ConstantRegressor` which is used almost exclusively for testing and establishing performance baselines.\n\nIn MLJ (or MLJModels) do `model = ConstantRegressor()` or `model = ConstantRegressor(distribution=...)` to construct a model instance.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`)\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `distribution_type=Distributions.Normal`: The distribution to be fit to the target data. Must be a subtype of `Distributions.ContinuousUnivariateDistribution`.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` (which for this model are ignored). Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: Return instead the means of the probabilistic predictions returned above.\n * `predict_median(mach, Xnew)`: Return instead the medians of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `target_distribution`: The distribution fit to the supplied target data.\n\n# Examples\n\n```julia\nusing MLJ\n\nX, y = make_regression(10, 2) # synthetic data: a table and vector\nregressor = ConstantRegressor()\nmach = machine(regressor, X, y) |> fit!\n\nfitted_params(mach)\n\nXnew, _ = make_regression(3, 2)\npredict(mach, Xnew)\npredict_mean(mach, Xnew)\n\n```\n\nSee also [`ConstantClassifier`](@ref)\n""" +":docstring" = """```\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "ConstantRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "HBOSDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateDiscretizer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\",)`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing,)`" + +[OutlierDetectionPython.IForestDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateDiscretizer" -":hyperparameters" = "`(:n_classes,)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" +":load_path" = "OutlierDetectionPython.IForestDetector" +":hyperparameters" = "`(:n_estimators, :max_samples, :max_features, :bootstrap, :random_state, :verbose, :n_jobs)`" +":is_pure_julia" = "`false`" +":human_name" = "i forest detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=MLJModels\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateDiscretizer" +":docstring" = """```\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "IForestDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" -":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" -":constructor" = "`nothing`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":is_wrapper" = "`false`" -[MLJModels.BinaryThresholdPredictor] -":is_wrapper" = "`true`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Float64\")`" -":package_uuid" = "" -":hyperparameter_ranges" = "`(nothing, nothing)`" +[OutlierDetectionPython.SOSDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJModels.BinaryThresholdPredictor" -":hyperparameters" = "`(:model, :threshold)`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":package_license" = "MIT" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.SOSDetector" +":hyperparameters" = "`(:perplexity, :metric, :eps)`" ":is_pure_julia" = "`false`" -":human_name" = "binary threshold predictor" -":is_supervised" = "`true`" +":human_name" = "sos detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nBinaryThresholdPredictor(model; threshold=0.5)\n```\n\nWrap the `Probabilistic` model, `model`, assumed to support binary classification, as a `Deterministic` model, by applying the specified `threshold` to the positive class probability. In addition to conventional supervised classifiers, it can also be applied to outlier detection models that predict normalized scores - in the form of appropriate `UnivariateFinite` distributions - that is, models that subtype `AbstractProbabilisticUnsupervisedDetector` or `AbstractProbabilisticSupervisedDetector`.\n\nBy convention the positive class is the second class returned by `levels(y)`, where `y` is the target.\n\nIf `threshold=0.5` then calling `predict` on the wrapped model is equivalent to calling `predict_mode` on the atomic model.\n\n# Example\n\nBelow is an application to the well-known Pima Indian diabetes dataset, including optimization of the `threshold` parameter, with a high balanced accuracy the objective. The target class distribution is 500 positives to 268 negatives.\n\nLoading the data:\n\n```julia\nusing MLJ, Random\nrng = Xoshiro(123)\n\ndiabetes = OpenML.load(43582)\noutcome, X = unpack(diabetes, ==(:Outcome), rng=rng);\ny = coerce(Int.(outcome), OrderedFactor);\n```\n\nChoosing a probabilistic classifier:\n\n```julia\nEvoTreesClassifier = @load EvoTreesClassifier\nprob_predictor = EvoTreesClassifier()\n```\n\nWrapping in `TunedModel` to get a deterministic classifier with `threshold` as a new hyperparameter:\n\n```julia\npoint_predictor = BinaryThresholdPredictor(prob_predictor, threshold=0.6)\nXnew, _ = make_moons(3, rng=rng)\nmach = machine(point_predictor, X, y) |> fit!\npredict(mach, X)[1:3] # [0, 0, 0]\n```\n\nEstimating performance:\n\n```julia\nbalanced = BalancedAccuracy(adjusted=true)\ne = evaluate!(mach, resampling=CV(nfolds=6), measures=[balanced, accuracy])\ne.measurement[1] # 0.405 ± 0.089\n```\n\nWrapping in tuning strategy to learn `threshold` that maximizes balanced accuracy:\n\n```julia\nr = range(point_predictor, :threshold, lower=0.1, upper=0.9)\ntuned_point_predictor = TunedModel(\n point_predictor,\n tuning=RandomSearch(rng=rng),\n resampling=CV(nfolds=6),\n range = r,\n measure=balanced,\n n=30,\n)\nmach2 = machine(tuned_point_predictor, X, y) |> fit!\noptimized_point_predictor = report(mach2).best_model\noptimized_point_predictor.threshold # 0.260\npredict(mach2, X)[1:3] # [1, 1, 0]\n```\n\nEstimating the performance of the auto-thresholding model (nested resampling here):\n\n```julia\ne = evaluate!(mach2, resampling=CV(nfolds=6), measure=[balanced, accuracy])\ne.measurement[1] # 0.477 ± 0.110\n```\n""" +":docstring" = """```\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "BinaryThresholdPredictor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "SOSDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`MLJModels.BinaryThresholdPredictor`" - -[MLJModels.FillImputer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.ABODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" -":output_scitype" = "`ScientificTypesBase.Table`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.FillImputer" -":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" -":is_pure_julia" = "`true`" -":human_name" = "fill imputer" +":load_path" = "OutlierDetectionPython.ABODDetector" +":hyperparameters" = "`(:n_neighbors, :method)`" +":is_pure_julia" = "`false`" +":human_name" = "abod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=MLJModels\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (columns) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "FillImputer" +":docstring" = """```\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "ABODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Table`" -":constructor" = "`nothing`" - -[MLJModels.DeterministicConstantRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`()`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`()`" + +[OutlierDetectionPython.LOFDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJModels.DeterministicConstantRegressor" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "deterministic constant regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.LOFDetector" +":hyperparameters" = "`(:n_neighbors, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs, :novelty)`" +":is_pure_julia" = "`false`" +":human_name" = "lof detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nDeterministicConstantRegressor\n```\n\nA model type for constructing a deterministic constant regressor, based on\n[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ\nmodel interface.\n\nFrom MLJ, the type can be imported using\n```\nDeterministicConstantRegressor = @load DeterministicConstantRegressor pkg=MLJModels\n```\n\nDo `model = DeterministicConstantRegressor()` to construct an instance with default hyper-parameters. """ +":docstring" = """```\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "DeterministicConstantRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "LOFDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateStandardizer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`()`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -":hyperparameter_ranges" = "`()`" + +[OutlierDetectionPython.PCADetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" -":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateStandardizer" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "single variable discretizer" +":load_path" = "OutlierDetectionPython.PCADetector" +":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "pca detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" -":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateStandardizer" +":docstring" = """```\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "PCADetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" -":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":constructor" = "`nothing`" - -[MLJModels.UnivariateFillImputer] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" -":package_uuid" = "d491faf4-2d78-11e9-2867-c94bc002c0b7" + +[OutlierDetectionPython.INNEDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" -":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Unsupervised`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJModels.UnivariateFillImputer" -":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" -":is_pure_julia" = "`true`" -":human_name" = "single variable fill imputer" +":load_path" = "OutlierDetectionPython.INNEDetector" +":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" +":is_pure_julia" = "`false`" +":human_name" = "inne detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=MLJModels\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" -":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":package_url" = "https://github.com/JuliaAI/MLJModels.jl" -":package_name" = "MLJModels" -":name" = "UnivariateFillImputer" +":docstring" = """```\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "INNEDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":constructor" = "`nothing`" - -[MLJGLMInterface.LinearBinaryClassifier] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.OCSVMDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJGLMInterface.LinearBinaryClassifier" -":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" -":is_pure_julia" = "`true`" -":human_name" = "linear binary classifier" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.OCSVMDetector" +":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" +":is_pure_julia" = "`false`" +":human_name" = "ocsvm detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `link=GLM.LogitLink`: The function which links the linear prediction function to the probability of a particular outcome or class. This must have type `GLM.Link01`. Options include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features used during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n RW = [5.1, 25.7, 6.4],\n CL = [15.9, 46.7, 14.3],\n CW = [18.7, 59.7, 12.2],\n BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n""" +":docstring" = """```\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":package_name" = "GLM" -":name" = "LinearBinaryClassifier" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "OCSVMDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJGLMInterface.LinearCountRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.ECODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Any\",)`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJGLMInterface.LinearCountRegressor" -":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`" -":is_pure_julia" = "`true`" -":human_name" = "linear count regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.ECODDetector" +":hyperparameters" = "`(:n_jobs,)`" +":is_pure_julia" = "`false`" +":human_name" = "ecod detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the model should fit.\n * `link=GLM.LogLink()`: The function which links the linear prediction function to the probability of a particular outcome or class. This should be one of the following: `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`, `GLM.SqrtLink()`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid failure when deviance is unchanged except for rounding errors.\n * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n x = Xmat[i, :]\n rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n 0.9969008753103842\n -2.0255901752504775\n 3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":docstring" = """```\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":package_name" = "GLM" -":name" = "LinearCountRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "ECODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJGLMInterface.LinearRegressor] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`" -":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[OutlierDetectionPython.SODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJGLMInterface.LinearRegressor" -":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`" -":is_pure_julia" = "`true`" -":human_name" = "linear regressor" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.SODDetector" +":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" +":is_pure_julia" = "`false`" +":human_name" = "sod detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false, no intercept will be calculated (e.g. the data is expected to be centered)\n * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence. If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n * `offsetcol=nothing`: Name of the column to be used as an offset, if any. An offset is a variable which is known to have a coefficient of 1.\n * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having the same Scitype as `X` above. Predictions are probabilistic.\n * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features`: The names of the features encountered during model fitting.\n * `coef`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n * `stderror`: The standard errors of the coefficients.\n * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n""" +":docstring" = """```\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/GLM.jl" -":package_name" = "GLM" -":name" = "LinearRegressor" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "SODDetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" -":supports_weights" = "`true`" +":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[OneRule.OneRuleClassifier] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`()`" -":package_uuid" = "90484964-6d6a-4979-af09-8657dbed84ff" -":hyperparameter_ranges" = "`()`" + +[OutlierDetectionPython.LODADetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" +":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "OneRule.OneRuleClassifier" -":hyperparameters" = "`()`" -":is_pure_julia" = "`true`" -":human_name" = "one rule classifier" -":is_supervised" = "`true`" +":prediction_type" = ":unknown" +":load_path" = "OutlierDetectionPython.LODADetector" +":hyperparameters" = "`(:n_bins, :n_random_cuts)`" +":is_pure_julia" = "`false`" +":human_name" = "loda detector" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOneRuleClassifier\n```\n\nA model type for constructing a one rule classifier, based on [OneRule.jl](https://github.com/roland-KA/OneRule.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneRuleClassifier = @load OneRuleClassifier pkg=OneRule\n```\n\nDo `model = OneRuleClassifier()` to construct an instance with default hyper-parameters. \n\n`OneRuleClassifier` implements the OneRule method for classification by Robert Holte (\"Very simple classification rules perform well on most commonly used datasets\" in: Machine Learning 11.1 (1993), pp. 63-90). \n\n```\nFor more information see:\n\n- Witten, Ian H., Eibe Frank, and Mark A. Hall. \n Data Mining Practical Machine Learning Tools and Techniques Third Edition. \n Morgan Kaufmann, 2017, pp. 93-96.\n- [Machine Learning - (One|Simple) Rule](https://datacadamia.com/data_mining/one_rule)\n- [OneRClassifier - One Rule for Classification](http://rasbt.github.io/mlxtend/user_guide/classifier/OneRClassifier/)\n```\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X, y) where\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Multiclass`, `OrderedFactor`, or `<:Finite`; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\nThis classifier has no hyper-parameters.\n\n# Operations\n\n * `predict(mach, Xnew)`: return (deterministic) predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `tree`: the tree (a `OneTree`) returned by the core OneTree.jl algorithm\n * `all_classes`: all classes (i.e. levels) of the target (used also internally to transfer `levels`-information to `predict`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `tree`: The `OneTree` created based on the training data\n * `nrules`: The number of rules `tree` contains\n * `error_rate`: fraction of wrongly classified instances\n * `error_count`: number of wrongly classified instances\n * `classes_seen`: list of target classes actually observed in training\n * `features`: the names of the features encountered in training\n\n# Examples\n\n```\nusing MLJ\n\nORClassifier = @load OneRuleClassifier pkg=OneRule\n\norc = ORClassifier()\n\noutlook = [\"sunny\", \"sunny\", \"overcast\", \"rainy\", \"rainy\", \"rainy\", \"overcast\", \"sunny\", \"sunny\", \"rainy\", \"sunny\", \"overcast\", \"overcast\", \"rainy\"]\ntemperature = [\"hot\", \"hot\", \"hot\", \"mild\", \"cool\", \"cool\", \"cool\", \"mild\", \"cool\", \"mild\", \"mild\", \"mild\", \"hot\", \"mild\"]\nhumidity = [\"high\", \"high\", \"high\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"normal\", \"normal\", \"high\", \"normal\", \"high\"]\nwindy = [\"false\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"false\", \"false\", \"true\", \"true\", \"false\", \"true\"]\n\nweather_data = (outlook = outlook, temperature = temperature, humidity = humidity, windy = windy)\nplay_data = [\"no\", \"no\", \"yes\", \"yes\", \"yes\", \"no\", \"yes\", \"no\", \"yes\", \"yes\", \"yes\", \"yes\", \"yes\", \"no\"]\n\nweather = coerce(weather_data, Textual => Multiclass)\nplay = coerce(play, Multiclass)\n\nmach = machine(orc, weather, play)\nfit!(mach)\n\nyhat = MLJ.predict(mach, weather) # in a real context 'new' `weather` data would be used\none_tree = fitted_params(mach).tree\nreport(mach).error_rate\n```\n\nSee also [OneRule.jl](https://github.com/roland-KA/OneRule.jl).\n""" +":docstring" = """```\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/roland-KA/OneRule.jl" -":package_name" = "OneRule" -":name" = "OneRuleClassifier" -":target_in_fit" = "`true`" +":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" +":package_name" = "OutlierDetectionPython" +":name" = "LODADetector" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Finite}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.MCDDetector] +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\")`" + +[OutlierDetectionPython.KDEDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7819,17 +7375,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.MCDDetector" -":hyperparameters" = "`(:store_precision, :assume_centered, :support_fraction, :random_state)`" +":load_path" = "OutlierDetectionPython.KDEDetector" +":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" ":is_pure_julia" = "`false`" -":human_name" = "mcd detector" +":human_name" = "kde detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nMCDDetector(store_precision = true,\n assume_centered = false,\n support_fraction = nothing,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.mcd)\n""" +":docstring" = """```\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "MCDDetector" +":name" = "KDEDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7842,11 +7398,11 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.COPODDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\",)`" + +[OutlierDetectionPython.CDDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"PythonCall.Py\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" ":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" @@ -7856,17 +7412,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.COPODDetector" -":hyperparameters" = "`(:n_jobs,)`" +":load_path" = "OutlierDetectionPython.CDDetector" +":hyperparameters" = "`(:model,)`" ":is_pure_julia" = "`false`" -":human_name" = "copod detector" +":human_name" = "cd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCOPODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.copod)\n""" +":docstring" = """```\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "COPODDetector" +":name" = "CDDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7879,13 +7435,13 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.HBOSDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\")`" + +[OutlierDetectionPython.KNNDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7893,17 +7449,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.HBOSDetector" -":hyperparameters" = "`(:n_bins, :alpha, :tol)`" +":load_path" = "OutlierDetectionPython.KNNDetector" +":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" ":is_pure_julia" = "`false`" -":human_name" = "hbos detector" +":human_name" = "knn detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nHBOSDetector(n_bins = 10,\n alpha = 0.1,\n tol = 0.5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.hbos)\n""" +":docstring" = """```\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "HBOSDetector" +":name" = "KNNDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7916,13 +7472,13 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.IForestDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\", \"Integer\")`" + +[OutlierDetectionPython.GMMDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7930,17 +7486,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.IForestDetector" -":hyperparameters" = "`(:n_estimators, :max_samples, :max_features, :bootstrap, :random_state, :verbose, :n_jobs)`" +":load_path" = "OutlierDetectionPython.GMMDetector" +":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" ":is_pure_julia" = "`false`" -":human_name" = "i forest detector" +":human_name" = "gmm detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nIForestDetector(n_estimators = 100,\n max_samples = \"auto\",\n max_features = 1.0\n bootstrap = false,\n random_state = nothing,\n verbose = 0,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.iforest)\n""" +":docstring" = """```\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "IForestDetector" +":name" = "GMMDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7953,13 +7509,13 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.SOSDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Real\", \"String\", \"Real\")`" + +[OutlierDetectionPython.COFDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -7967,17 +7523,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.SOSDetector" -":hyperparameters" = "`(:perplexity, :metric, :eps)`" +":load_path" = "OutlierDetectionPython.COFDetector" +":hyperparameters" = "`(:n_neighbors, :method)`" ":is_pure_julia" = "`false`" -":human_name" = "sos detector" +":human_name" = "cof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSOSDetector(perplexity = 4.5,\n metric = \"minkowski\",\n eps = 1e-5)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sos)\n""" +":docstring" = """```\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "SOSDetector" +":name" = "COFDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -7990,13 +7546,13 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.ABODDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"String\")`" + +[OutlierDetectionPython.CBLOFDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -8004,17 +7560,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.ABODDetector" -":hyperparameters" = "`(:n_neighbors, :method)`" +":load_path" = "OutlierDetectionPython.CBLOFDetector" +":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" ":is_pure_julia" = "`false`" -":human_name" = "abod detector" +":human_name" = "cblof detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nABODDetector(n_neighbors = 5,\n method = \"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.abod)\n""" +":docstring" = """```\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "ABODDetector" +":name" = "CBLOFDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8027,13 +7583,13 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.LOFDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\", \"Bool\")`" + +[OutlierDetectionPython.LOCIDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Real\", \"Real\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -8041,17 +7597,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LOFDetector" -":hyperparameters" = "`(:n_neighbors, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs, :novelty)`" +":load_path" = "OutlierDetectionPython.LOCIDetector" +":hyperparameters" = "`(:alpha, :k)`" ":is_pure_julia" = "`false`" -":human_name" = "lof detector" +":human_name" = "loci detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLOFDetector(n_neighbors = 5,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1,\n novelty = true)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lof)\n""" +":docstring" = """```\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "LOFDetector" +":name" = "LOCIDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8064,13 +7620,13 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.PCADetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"String\", \"Real\", \"Union{Integer, String}\", \"Bool\", \"Bool\", \"Union{Nothing, Integer}\")`" + +[OutlierDetectionPython.LMDDDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -8078,17 +7634,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.PCADetector" -":hyperparameters" = "`(:n_components, :n_selected_components, :copy, :whiten, :svd_solver, :tol, :iterated_power, :standardization, :weighted, :random_state)`" +":load_path" = "OutlierDetectionPython.LMDDDetector" +":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" ":is_pure_julia" = "`false`" -":human_name" = "pca detector" +":human_name" = "lmdd detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nPCADetector(n_components = nothing,\n n_selected_components = nothing,\n copy = true,\n whiten = false,\n svd_solver = \"auto\",\n tol = 0.0\n iterated_power = \"auto\",\n standardization = true,\n weighted = true,\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.pca)\n""" +":docstring" = """```\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "PCADetector" +":name" = "LMDDDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8101,13 +7657,13 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.INNEDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Union{Real, String}\", \"Union{Nothing, Integer}\")`" + +[OutlierDetectionPython.RODDetector] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Bool\",)`" ":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" ":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" ":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" @@ -8115,17 +7671,17 @@ ":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.INNEDetector" -":hyperparameters" = "`(:n_estimators, :max_samples, :random_state)`" +":load_path" = "OutlierDetectionPython.RODDetector" +":hyperparameters" = "`(:parallel_execution,)`" ":is_pure_julia" = "`false`" -":human_name" = "inne detector" +":human_name" = "rod detector" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nINNEDetector(n_estimators=200,\n max_samples=\"auto\",\n random_state=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.inne)\n""" +":docstring" = """```\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" ":package_name" = "OutlierDetectionPython" -":name" = "INNEDetector" +":name" = "RODDetector" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" @@ -8138,1048 +7694,1159 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" -":constructor" = "`nothing`" - -[OutlierDetectionPython.OCSVMDetector] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"String\", \"Integer\", \"Union{Real, String}\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Integer\", \"Bool\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[SelfOrganizingMaps.SelfOrganizingMap] +":constructor" = "`nothing`" +":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" +":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.OCSVMDetector" -":hyperparameters" = "`(:kernel, :degree, :gamma, :coef0, :tol, :nu, :shrinking, :cache_size, :verbose, :max_iter)`" -":is_pure_julia" = "`false`" -":human_name" = "ocsvm detector" +":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" +":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" +":is_pure_julia" = "`true`" +":human_name" = "self organizing map" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOCSVMDetector(kernel = \"rbf\",\n degree = 3,\n gamma = \"auto\",\n coef0 = 0.0,\n tol = 0.001,\n nu = 0.5,\n shrinking = true,\n cache_size = 200,\n verbose = false,\n max_iter = -1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ocsvm)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "OCSVMDetector" +":docstring" = """```\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" +":package_name" = "SelfOrganizingMaps" +":name" = "SelfOrganizingMap" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":is_wrapper" = "`false`" + +[InteractiveUtils] + +[MLJMultivariateStatsInterface.LDA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.LDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" +":is_pure_julia" = "`true`" +":human_name" = "linear discriminant analysis model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "LDA" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[OutlierDetectionPython.ECODDetector] +[MLJMultivariateStatsInterface.MultitargetLinearRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Any\",)`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" +":package_license" = "MIT" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" +":hyperparameters" = "`(:bias,)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget linear regressor" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "MultitargetLinearRegressor" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":deep_properties" = "`()`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJMultivariateStatsInterface.BayesianSubspaceLDA] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Probabilistic`" +":package_license" = "MIT" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :priors)`" +":is_pure_julia" = "`true`" +":human_name" = "Bayesian subspace LDA model" +":is_supervised" = "`true`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "BayesianSubspaceLDA" +":target_in_fit" = "`true`" +":supports_class_weights" = "`false`" +":supports_online" = "`false`" +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":deep_properties" = "`()`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":supports_training_losses" = "`false`" +":supports_weights" = "`false`" +":reports_feature_importances" = "`false`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" +":constructor" = "`nothing`" + +[MLJMultivariateStatsInterface.FactorAnalysis] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +":reporting_operations" = "`()`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":tags" = [] +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.ECODDetector" -":hyperparameters" = "`(:n_jobs,)`" -":is_pure_julia" = "`false`" -":human_name" = "ecod detector" +":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" +":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "factor analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nECODDetector(n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.ecod)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "ECODDetector" +":docstring" = """```\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "FactorAnalysis" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[OutlierDetectionPython.SODDetector] +[MLJMultivariateStatsInterface.LinearRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\", \"Real\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Bool\",)`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.SODDetector" -":hyperparameters" = "`(:n_neighbors, :ref_set, :alpha)`" -":is_pure_julia" = "`false`" -":human_name" = "sod detector" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" +":hyperparameters" = "`(:bias,)`" +":is_pure_julia" = "`true`" +":human_name" = "linear regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSODDetector(n_neighbors = 5,\n ref_set = 10,\n alpha = 0.8)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.sod)\n""" +":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "SODDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "LinearRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[OutlierDetectionPython.LODADetector] +[MLJMultivariateStatsInterface.ICA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LODADetector" -":hyperparameters" = "`(:n_bins, :n_random_cuts)`" -":is_pure_julia" = "`false`" -":human_name" = "loda detector" +":load_path" = "MLJMultivariateStatsInterface.ICA" +":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "independent component analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLODADetector(n_bins = 10,\n n_random_cuts = 100)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loda)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "LODADetector" +":docstring" = """```\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "ICA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[OutlierDetectionPython.KDEDetector] +[MLJMultivariateStatsInterface.PPCA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Real\", \"String\", \"Integer\", \"String\", \"Any\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.KDEDetector" -":hyperparameters" = "`(:bandwidth, :algorithm, :leaf_size, :metric, :metric_params)`" -":is_pure_julia" = "`false`" -":human_name" = "kde detector" +":load_path" = "MLJMultivariateStatsInterface.PPCA" +":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "probabilistic PCA model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKDEDetector(bandwidth=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n metric_params=None)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.kde)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "KDEDetector" +":docstring" = """```\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PPCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[OutlierDetectionPython.CDDetector] +[MLJMultivariateStatsInterface.RidgeRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"PythonCall.Py\",)`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.CDDetector" -":hyperparameters" = "`(:model,)`" -":is_pure_julia" = "`false`" -":human_name" = "cd detector" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" +":is_pure_julia" = "`true`" +":human_name" = "ridge regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCDDetector(whitening = true,\n rule_of_thumb = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cd)\n""" +":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "CDDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "RidgeRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[OutlierDetectionPython.KNNDetector] +[MLJMultivariateStatsInterface.KernelPCA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"String\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Any\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.KNNDetector" -":hyperparameters" = "`(:n_neighbors, :method, :radius, :algorithm, :leaf_size, :metric, :p, :metric_params, :n_jobs)`" -":is_pure_julia" = "`false`" -":human_name" = "knn detector" +":load_path" = "MLJMultivariateStatsInterface.KernelPCA" +":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" +":is_pure_julia" = "`true`" +":human_name" = "kernel prinicipal component analysis model" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKNNDetector(n_neighbors = 5,\n method = \"largest\",\n radius = 1.0,\n algorithm = \"auto\",\n leaf_size = 30,\n metric = \"minkowski\",\n p = 2,\n metric_params = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.knn)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "KNNDetector" +":docstring" = """```\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "KernelPCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[OutlierDetectionPython.GMMDetector] +[MLJMultivariateStatsInterface.MultitargetRidgeRegressor] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Real\", \"Real\", \"Integer\", \"Integer\", \"String\", \"Union{Nothing, Integer}\", \"Bool\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.GMMDetector" -":hyperparameters" = "`(:n_components, :covariance_type, :tol, :reg_covar, :max_iter, :n_init, :init_params, :random_state, :warm_start)`" -":is_pure_julia" = "`false`" -":human_name" = "gmm detector" -":is_supervised" = "`false`" +":prediction_type" = ":deterministic" +":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" +":hyperparameters" = "`(:lambda, :bias)`" +":is_pure_julia" = "`true`" +":human_name" = "multitarget ridge regressor" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nGMMDetector(n_components=1,\n covariance_type=\"full\",\n tol=0.001,\n reg_covar=1e-06,\n max_iter=100,\n n_init=1,\n init_params=\"kmeans\",\n weights_init=None,\n means_init=None,\n precisions_init=None,\n random_state=None,\n warm_start=False)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.gmm)\n""" +":docstring" = """```\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "GMMDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "MultitargetRidgeRegressor" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[OutlierDetectionPython.COFDetector] +[MLJMultivariateStatsInterface.SubspaceLDA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"String\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.COFDetector" -":hyperparameters" = "`(:n_neighbors, :method)`" -":is_pure_julia" = "`false`" -":human_name" = "cof detector" -":is_supervised" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" +":hyperparameters" = "`(:normalize, :outdim, :dist)`" +":is_pure_julia" = "`true`" +":human_name" = "subpace LDA model" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCOFDetector(n_neighbors = 5,\n method=\"fast\")\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cof)\n""" +":docstring" = """```\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "COFDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "SubspaceLDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[OutlierDetectionPython.CBLOFDetector] +[MLJMultivariateStatsInterface.BayesianLDA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"Real\", \"Real\", \"Bool\", \"Union{Nothing, Integer}\", \"Integer\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" +":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "MIT" -":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.CBLOFDetector" -":hyperparameters" = "`(:n_clusters, :alpha, :beta, :use_weights, :random_state, :n_jobs)`" -":is_pure_julia" = "`false`" -":human_name" = "cblof detector" -":is_supervised" = "`false`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" +":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" +":is_pure_julia" = "`true`" +":human_name" = "Bayesian LDA model" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nCBLOFDetector(n_clusters = 8,\n alpha = 0.9,\n beta = 5,\n use_weights = false,\n random_state = nothing,\n n_jobs = 1)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.cblof)\n""" +":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "CBLOFDetector" -":target_in_fit" = "`false`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "BayesianLDA" +":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Unknown`" ":constructor" = "`nothing`" -[OutlierDetectionPython.LOCIDetector] +[MLJMultivariateStatsInterface.PCA] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Real\", \"Real\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing)`" +":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" +":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LOCIDetector" -":hyperparameters" = "`(:alpha, :k)`" -":is_pure_julia" = "`false`" -":human_name" = "loci detector" +":load_path" = "MLJMultivariateStatsInterface.PCA" +":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" +":is_pure_julia" = "`true`" +":human_name" = "pca" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLOCIDetector(alpha = 0.5,\n k = 3)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.loci)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "LOCIDetector" +":docstring" = """```\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" +":package_name" = "MultivariateStats" +":name" = "PCA" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[OutlierDetectionPython.LMDDDetector] +[MLJTransforms.Standardizer] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Integer\", \"String\", \"Union{Nothing, Integer}\")`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Union{Function, AbstractVector{Symbol}}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}}`" +":output_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.LMDDDetector" -":hyperparameters" = "`(:n_iter, :dis_measure, :random_state)`" -":is_pure_julia" = "`false`" -":human_name" = "lmdd detector" +":load_path" = "MLJTransforms.Standardizer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :count)`" +":is_pure_julia" = "`true`" +":human_name" = "standardizer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLMDDDetector(n_iter = 50,\n dis_measure = \"aad\",\n random_state = nothing)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.lmdd)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "LMDDDetector" +":docstring" = """```\nStandardizer\n```\n\nA model type for constructing a standardizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStandardizer = @load Standardizer pkg=unknown\n```\n\nDo `model = Standardizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `Standardizer(features=...)`.\n\nUse this model to standardize (whiten) a `Continuous` vector, or relevant columns of a table. The rescalings applied by this transformer to new data are always those learned during the training phase, which are generally different from what would actually standardize the new data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table or any abstract vector with `Continuous` element scitype (any abstract float vector). Only features in a table with `Continuous` scitype can be standardized; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: one of the following, with the behavior indicated below:\n\n * `[]` (empty, the default): standardize all features (columns) having `Continuous` element scitype\n * non-empty vector of feature names (symbols): standardize only the `Continuous` features in the vector (if `ignore=false`) or `Continuous` features *not* named in the vector (`ignore=true`).\n * function or other callable: standardize a feature if the callable returns `true` on its name. For example, `Standardizer(features = name -> name in [:x1, :x3], ignore = true, count=true)` has the same effect as `Standardizer(features = [:x1, :x3], ignore = true, count=true)`, namely to standardize all `Continuous` and `Count` features, with the exception of `:x1` and `:x3`.\n\n Note this behavior is further modified if the `ordered_factor` or `count` flags are set to `true`; see below\n * `ignore=false`: whether to ignore or standardize specified `features`, as explained above\n * `ordered_factor=false`: if `true`, standardize any `OrderedFactor` feature wherever a `Continuous` feature would be standardized, as described above\n * `count=false`: if `true`, standardize any `Count` feature wherever a `Continuous` feature would be standardized, as described above\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with relevant features standardized according to the rescalings learned during fitting of `mach`.\n * `inverse_transform(mach, Z)`: apply the inverse transformation to `Z`, so that `inverse_transform(mach, transform(mach, Xnew))` is approximately the same as `Xnew`; unavailable if `ordered_factor` or `count` flags were set to `true`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_fit` - the names of features that will be standardized\n * `means` - the corresponding untransformed mean values\n * `stds` - the corresponding untransformed standard deviations\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_fit`: the names of features that will be standardized\n\n# Examples\n\n```\nusing MLJ\n\nX = (ordinal1 = [1, 2, 3],\n ordinal2 = coerce([:x, :y, :x], OrderedFactor),\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [-20.0, -30.0, -40.0],\n nominal = coerce([\"Your father\", \"he\", \"is\"], Multiclass));\n\njulia> schema(X)\n┌──────────┬──────────────────┐\n│ names │ scitypes │\n├──────────┼──────────────────┤\n│ ordinal1 │ Count │\n│ ordinal2 │ OrderedFactor{2} │\n│ ordinal3 │ Continuous │\n│ ordinal4 │ Continuous │\n│ nominal │ Multiclass{3} │\n└──────────┴──────────────────┘\n\nstand1 = Standardizer();\n\njulia> transform(fit!(machine(stand1, X)), X)\n(ordinal1 = [1, 2, 3],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [-1.0, 0.0, 1.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n\nstand2 = Standardizer(features=[:ordinal3, ], ignore=true, count=true);\n\njulia> transform(fit!(machine(stand2, X)), X)\n(ordinal1 = [-1.0, 0.0, 1.0],\n ordinal2 = CategoricalValue{Symbol,UInt32}[:x, :y, :x],\n ordinal3 = [10.0, 20.0, 30.0],\n ordinal4 = [1.0, 0.0, -1.0],\n nominal = CategoricalValue{String,UInt32}[\"Your father\", \"he\", \"is\"],)\n```\n\nSee also [`OneHotEncoder`](@ref), [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "Standardizer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":transform_scitype" = "`Union{ScientificTypesBase.Table, AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -[OutlierDetectionPython.RODDetector] +[MLJTransforms.UnivariateTimeTypeToContinuous] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "2449c660-d36c-460e-a68b-92ab3c865b3e" -":hyperparameter_ranges" = "`(nothing,)`" +":hyperparameter_types" = "`(\"Union{Nothing, Dates.TimeType}\", \"Dates.Period\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}}, Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.ScientificTimeType}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "OutlierDetectionPython.RODDetector" -":hyperparameters" = "`(:parallel_execution,)`" -":is_pure_julia" = "`false`" -":human_name" = "rod detector" +":load_path" = "MLJTransforms.UnivariateTimeTypeToContinuous" +":hyperparameters" = "`(:zero_time, :step)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable transformer that creates continuous representations of temporally typed data" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nRODDetector(parallel_execution = false)\n```\n\n[https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod](https://pyod.readthedocs.io/en/latest/pyod.models.html#module-pyod.models.rod)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/OutlierDetectionJL/OutlierDetectionPython.jl" -":package_name" = "OutlierDetectionPython" -":name" = "RODDetector" +":docstring" = """```\nUnivariateTimeTypeToContinuous\n```\n\nA model type for constructing a single variable transformer that creates continuous representations of temporally typed data, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateTimeTypeToContinuous = @load UnivariateTimeTypeToContinuous pkg=unknown\n```\n\nDo `model = UnivariateTimeTypeToContinuous()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateTimeTypeToContinuous(zero_time=...)`.\n\nUse this model to convert vectors with a `TimeType` element type to vectors of `Float64` type (`Continuous` element scitype).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector whose element type is a subtype of `Dates.TimeType`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `zero_time`: the time that is to correspond to 0.0 under transformations, with the type coinciding with the training data element type. If unspecified, the earliest time encountered in training is used.\n * `step::Period=Hour(24)`: time interval to correspond to one unit under transformation\n\n# Operations\n\n * `transform(mach, xnew)`: apply the encoding inferred when `mach` was fit\n\n# Fitted parameters\n\n`fitted_params(mach).fitresult` is the tuple `(zero_time, step)` actually used in transformations, which may differ from the user-specified hyper-parameters.\n\n# Example\n\n```\nusing MLJ\nusing Dates\n\nx = [Date(2001, 1, 1) + Day(i) for i in 0:4]\n\nencoder = UnivariateTimeTypeToContinuous(zero_time=Date(2000, 1, 1),\n step=Week(1))\n\nmach = machine(encoder, x)\nfit!(mach)\njulia> transform(mach, x)\n5-element Vector{Float64}:\n 52.285714285714285\n 52.42857142857143\n 52.57142857142857\n 52.714285714285715\n 52.857142\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateTimeTypeToContinuous" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":reformat", ":selectrows", ":fit", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`AbstractVector{<:Union{Missing, ScientificTypesBase.OrderedFactor{2}}}`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Continuous}}`" +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.ScientificTimeType}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -[SelfOrganizingMaps.SelfOrganizingMap] +[MLJTransforms.OneHotEncoder] ":is_wrapper" = "`false`" -":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Symbol\", \"Symbol\", \"Symbol\", \"Symbol\", \"Distances.PreMetric\", \"Int64\")`" -":package_uuid" = "ba4b7379-301a-4be0-bee6-171e4e152787" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Bool\", \"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "SelfOrganizingMaps.SelfOrganizingMap" -":hyperparameters" = "`(:k, :η, :σ², :grid_type, :η_decay, :σ_decay, :neighbor_function, :matching_distance, :Nepochs)`" +":load_path" = "MLJTransforms.OneHotEncoder" +":hyperparameters" = "`(:features, :drop_last, :ordered_factor, :ignore)`" ":is_pure_julia" = "`true`" -":human_name" = "self organizing map" +":human_name" = "one-hot encoder" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSelfOrganizingMap\n```\n\nA model type for constructing a self organizing map, based on [SelfOrganizingMaps.jl](https://github.com/john-waczak/SelfOrganizingMaps.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSelfOrganizingMap = @load SelfOrganizingMap pkg=SelfOrganizingMaps\n```\n\nDo `model = SelfOrganizingMap()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SelfOrganizingMap(k=...)`.\n\nSelfOrganizingMaps implements [Kohonen's Self Organizing Map](https://ieeexplore.ieee.org/abstract/document/58325?casa_token=pGue0TD38nAAAAAA:kWFkvMJQKgYOTJjJx-_bRx8n_tnWEpau2QeoJ1gJt0IsywAuvkXYc0o5ezdc2mXfCzoEZUQXSQ), Proceedings of the IEEE; Kohonen, T.; (1990):\"The self-organizing map\"\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with mach = machine(model, X) where\n\n * `X`: an `AbstractMatrix` or `Table` of input features whose columns are of scitype `Continuous.`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `k=10`: Number of nodes along once side of SOM grid. There are `k²` total nodes.\n * `η=0.5`: Learning rate. Scales adjust made to winning node and its neighbors during each round of training.\n * `σ²=0.05`: The (squared) neighbor radius. Used to determine scale for neighbor node adjustments.\n * `grid_type=:rectangular` Node grid geometry. One of `(:rectangular, :hexagonal, :spherical)`.\n * `η_decay=:exponential` Learning rate schedule function. One of `(:exponential, :asymptotic)`\n * `σ_decay=:exponential` Neighbor radius schedule function. One of `(:exponential, :asymptotic, :none)`\n * `neighbor_function=:gaussian` Kernel function used to make adjustment to neighbor weights. Scale is set by `σ²`. One of `(:gaussian, :mexican_hat)`.\n * `matching_distance=euclidean` Distance function from `Distances.jl` used to determine winning node.\n * `Nepochs=1` Number of times to repeat training on the shuffled dataset.\n\n# Operations\n\n * `transform(mach, Xnew)`: returns the coordinates of the winning SOM node for each instance of `Xnew`. For SOM of grid*type `:rectangular` and `:hexagonal`, these are cartesian coordinates. For grid*type `:spherical`, these are the latitude and longitude in radians.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coords`: The coordinates of each of the SOM nodes (points in the domain of the map) with shape (k², 2)\n * `weights`: Array of weight vectors for the SOM nodes (corresponding points in the map's range) of shape (k², input dimension)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `classes`: the index of the winning node for each instance of the training data X interpreted as a class label\n\n# Examples\n\n```\nusing MLJ\nsom = @load SelfOrganizingMap pkg=SelfOrganizingMaps\nmodel = som()\nX, y = make_regression(50, 3) # synthetic data\nmach = machine(model, X) |> fit!\nX̃ = transform(mach, X)\n\nrpt = report(mach)\nclasses = rpt.classes\n```\n""" -":inverse_transform_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/john-waczak/SelfOrganizingMaps.jl" -":package_name" = "SelfOrganizingMaps" -":name" = "SelfOrganizingMap" +":docstring" = """```\nOneHotEncoder\n```\n\nA model type for constructing a one-hot encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneHotEncoder = @load OneHotEncoder pkg=unknown\n```\n\nDo `model = OneHotEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneHotEncoder(features=...)`.\n\nUse this model to one-hot encode the `Multiclass` and `OrderedFactor` features (columns) of some table, leaving other columns unchanged.\n\nNew data to be transformed may lack features present in the fit data, but no *new* features can be present.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo ensure *all* features are transformed into `Continuous` features, or dropped, use [`ContinuousEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. Columns can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of symbols (feature names). If empty (default) then all `Multiclass` and `OrderedFactor` features are encoded. Otherwise, encoding is further restricted to the specified features (`ignore=false`) or the unspecified features (`ignore=true`). This default behavior can be modified by the `ordered_factor` flag.\n * `ordered_factor=false`: when `true`, `OrderedFactor` features are universally excluded\n * `drop_last=true`: whether to drop the column corresponding to the final class of encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but just two features otherwise.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `all_features`: names of all features encountered in training\n * `fitted_levels_given_feature`: dictionary of the levels associated with each feature encoded, keyed on the feature name\n * `ref_name_pairs_given_feature`: dictionary of pairs `r => ftr` (such as `0x00000001 => :grad__A`) where `r` is a CategoricalArrays.jl reference integer representing a level, and `ftr` the corresponding new feature name; the dictionary is keyed on the names of features that are encoded\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `features_to_be_encoded`: names of input features to be encoded\n * `new_features`: names of all output features\n\n# Example\n\n```\nusing MLJ\n\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n└───────────┴──────────────────┘\n\nhot = OneHotEncoder(drop_last=true)\nmach = fit!(machine(hot, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade__A │ Continuous │\n│ grade__B │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Count │\n└──────────────┴────────────┘\n```\n\nSee also [`ContinuousEncoder`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OneHotEncoder" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":OneHotEncoder"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`Union{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`AbstractMatrix{ScientificTypesBase.Continuous}`" +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -[InteractiveUtils] - -[MLJMultivariateStatsInterface.LDA] -":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Distances.SemiMetric\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" +[MLJTransforms.ContinuousEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Bool\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" ":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.LDA" -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :dist)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContinuousEncoder" +":hyperparameters" = "`(:drop_last, :one_hot_ordered_factors)`" ":is_pure_julia" = "`true`" -":human_name" = "linear discriminant analysis model" -":is_supervised" = "`true`" +":human_name" = "continuous encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLDA\n```\n\nA model type for constructing a linear discriminant analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLDA = @load LDA pkg=MultivariateStats\n```\n\nDo `model = LDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LDA(method=...)`.\n\n[Multiclass linear discriminant analysis](https://en.wikipedia.org/wiki/Linear_discriminant_analysis) learns a projection in a space of features to a lower dimensional space, in a way that attempts to preserve as much as possible the degree to which the classes of a discrete target variable can be discriminated. This can be used either for dimension reduction of the features (see `transform` below) or for probabilistic classification of the target (see `predict` below).\n\nIn the case of prediction, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: The solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nLDA = @load LDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = LDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n\n```\n\nSee also [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "LDA" -":target_in_fit" = "`true`" +":docstring" = """```\nContinuousEncoder\n```\n\nA model type for constructing a continuous encoder, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContinuousEncoder = @load ContinuousEncoder pkg=unknown\n```\n\nDo `model = ContinuousEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContinuousEncoder(drop_last=...)`.\n\nUse this model to arrange all features (features) of a table to have `Continuous` element scitype, by applying the following protocol to each feature `ftr`:\n\n * If `ftr` is already `Continuous` retain it.\n * If `ftr` is `Multiclass`, one-hot encode it.\n * If `ftr` is `OrderedFactor`, replace it with `coerce(ftr, Continuous)` (vector of floating point integers), unless `ordered_factors=false` is specified, in which case one-hot encode it.\n * If `ftr` is `Count`, replace it with `coerce(ftr, Continuous)`.\n * If `ftr` has some other element scitype, or was not observed in fitting the encoder, drop it from the table.\n\n**Warning:** This transformer assumes that `levels(col)` for any `Multiclass` or `OrderedFactor` column, `col`, is the same for training data and new data to be transformed.\n\nTo selectively one-hot-encode categorical features (without dropping features) use [`OneHotEncoder`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any Tables.jl compatible table. features can be of mixed type but only those with element scitype `Multiclass` or `OrderedFactor` can be encoded. Check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `drop_last=true`: whether to drop the column corresponding to the final class of one-hot encoded features. For example, a three-class feature is spawned into three new features if `drop_last=false`, but two just features otherwise.\n * `one_hot_ordered_factors=false`: whether to one-hot any feature with `OrderedFactor` element scitype, or to instead coerce it directly to a (single) `Continuous` feature using the order\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_to_keep`: names of features that will not be dropped from the table\n * `one_hot_encoder`: the `OneHotEncoder` model instance for handling the one-hot encoding\n * `one_hot_encoder_fitresult`: the fitted parameters of the `OneHotEncoder` model\n\n# Report\n\n * `features_to_keep`: names of input features that will not be dropped from the table\n * `new_features`: names of all output features\n\n# Example\n\n```julia\nX = (name=categorical([\"Danesh\", \"Lee\", \"Mary\", \"John\"]),\n grade=categorical([\"A\", \"B\", \"A\", \"C\"], ordered=true),\n height=[1.85, 1.67, 1.5, 1.67],\n n_devices=[3, 2, 4, 3],\n comments=[\"the force\", \"be\", \"with you\", \"too\"])\n\njulia> schema(X)\n┌───────────┬──────────────────┐\n│ names │ scitypes │\n├───────────┼──────────────────┤\n│ name │ Multiclass{4} │\n│ grade │ OrderedFactor{3} │\n│ height │ Continuous │\n│ n_devices │ Count │\n│ comments │ Textual │\n└───────────┴──────────────────┘\n\nencoder = ContinuousEncoder(drop_last=true)\nmach = fit!(machine(encoder, X))\nW = transform(mach, X)\n\njulia> schema(W)\n┌──────────────┬────────────┐\n│ names │ scitypes │\n├──────────────┼────────────┤\n│ name__Danesh │ Continuous │\n│ name__John │ Continuous │\n│ name__Lee │ Continuous │\n│ grade │ Continuous │\n│ height │ Continuous │\n│ n_devices │ Continuous │\n└──────────────┴────────────┘\n\njulia> setdiff(schema(X).names, report(mach).features_to_keep) # dropped features\n1-element Vector{Symbol}:\n :comments\n\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContinuousEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":ContinuousEncoder"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.MultitargetLinearRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJTransforms.FrequencyEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.MultitargetLinearRegressor" -":hyperparameters" = "`(:bias,)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.FrequencyEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :normalize, :output_type)`" ":is_pure_julia" = "`true`" -":human_name" = "multitarget linear regressor" -":is_supervised" = "`true`" +":human_name" = "frequency encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nMultitargetLinearRegressor\n```\n\nA model type for constructing a multitarget linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetLinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetLinearRegressor(bias=...)`.\n\n`MultitargetLinearRegressor` assumes the target variable is vector-valued with continuous components. It trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nLinearRegressor = @load MultitargetLinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 9; n_targets = 2) # a table and a table (synthetic data)\n\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 9)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "MultitargetLinearRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nFrequencyEncoder\n```\n\nA model type for constructing a frequency encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFrequencyEncoder = @load FrequencyEncoder pkg=MLJTransforms\n```\n\nDo `model = FrequencyEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FrequencyEncoder(features=...)`.\n\n`FrequencyEncoder` implements frequency encoding which replaces the categorical values in the specified categorical features with their (normalized or raw) frequencies of occurrence in the dataset. \n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `normalize=false`: Whether to use normalized frequencies that sum to 1 over category values or to use raw counts.\n * `output_type=Float32`: The type of the output values. The default is `Float32`, but you can set it to `Float64` or any other type that can hold the frequency values.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply frequency encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `statistic_given_feat_val`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = FrequencyEncoder(ordered_factor = false, normalize=true)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 2, 2],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [4, 4, 4, 1, 4],\n D = [3, 2, 3, 2, 3],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FrequencyEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.BayesianSubspaceLDA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" + +[MLJTransforms.TargetEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Real\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table, ScientificTypesBase.Unknown}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.BayesianSubspaceLDA" -":hyperparameters" = "`(:normalize, :outdim, :priors)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.TargetEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :lambda, :m)`" ":is_pure_julia" = "`true`" -":human_name" = "Bayesian subspace LDA model" -":is_supervised" = "`true`" +":human_name" = "target encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nBayesianSubspaceLDA\n```\n\nA model type for constructing a Bayesian subspace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianSubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianSubspaceLDA(normalize=...)`.\n\nThe Bayesian multiclass subspace linear discriminant analysis algorithm learns a projection matrix as described in [`SubspaceLDA`](@ref). The posterior class probability distribution is derived as in [`BayesianLDA`](@ref).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n\n`outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The overall mean of the training data.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianSubspaceLDA = @load BayesianSubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianSubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`SubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "BayesianSubspaceLDA" +":docstring" = """```\nTargetEncoder\n```\n\nA model type for constructing a target encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nTargetEncoder = @load TargetEncoder pkg=MLJTransforms\n```\n\nDo `model = TargetEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `TargetEncoder(features=...)`.\n\n`TargetEncoder` implements target encoding as defined in [1] to encode categorical variables into continuous ones using statistics from the target variable.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous` or `Count` for regression problems and `Multiclass` or `OrderedFactor` for classification problems; check the scitype with `schema(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `λ`: Shrinkage hyperparameter used to mix between posterior and prior statistics as described in [1]\n * `m`: An integer hyperparameter to compute shrinkage as described in [1]. If `m=:auto` then m will be computed using\n\nempirical Bayes estimation as described in [1]\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply target encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `task`: Whether the task is `Classification` or `Regression`\n * `y_statistic_given_feat_level`: A dictionary with the necessary statistics to encode each categorical feature. It maps each level in each categorical feature to a statistic computed over the target.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Define the target variable \ny = [\"c1\", \"c2\", \"c3\", \"c1\", \"c2\",]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\ny = coerce(y, Multiclass)\n\nencoder = TargetEncoder(ordered_factor = false, lambda = 1.0, m = 0,)\nmach = fit!(machine(encoder, X, y))\nXnew = transform(mach, X)\n\njulia > schema(Xnew)\n┌───────┬──────────────────┬─────────────────────────────────┐\n│ names │ scitypes │ types │\n├───────┼──────────────────┼─────────────────────────────────┤\n│ A_1 │ Continuous │ Float64 │\n│ A_2 │ Continuous │ Float64 │\n│ A_3 │ Continuous │ Float64 │\n│ B │ Continuous │ Float64 │\n│ C_1 │ Continuous │ Float64 │\n│ C_2 │ Continuous │ Float64 │\n│ C_3 │ Continuous │ Float64 │\n│ D_1 │ Continuous │ Float64 │\n│ D_2 │ Continuous │ Float64 │\n│ D_3 │ Continuous │ Float64 │\n│ E │ OrderedFactor{5} │ CategoricalValue{Int64, UInt32} │\n└───────┴──────────────────┴─────────────────────────────────┘\n```\n\n# Reference\n\n[1] Micci-Barreca, Daniele. “A preprocessing scheme for high-cardinality categorical attributes in classification and prediction problems” SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32.\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "TargetEncoder" ":target_in_fit" = "`true`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.FactorAnalysis] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Symbol\", \"Int64\", \"Int64\", \"Real\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateBoxCoxTransformer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{AbstractVector{ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.FactorAnalysis" -":hyperparameters" = "`(:method, :maxoutdim, :maxiter, :tol, :eta, :mean)`" +":load_path" = "MLJTransforms.UnivariateBoxCoxTransformer" +":hyperparameters" = "`(:n, :shift)`" ":is_pure_julia" = "`true`" -":human_name" = "factor analysis model" +":human_name" = "single variable Box-Cox transformer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nFactorAnalysis\n```\n\nA model type for constructing a factor analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n```\n\nDo `model = FactorAnalysis()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FactorAnalysis(method=...)`.\n\nFactor analysis is a linear-Gaussian latent variable model that is closely related to probabilistic PCA. In contrast to the probabilistic PCA model, the covariance of conditional distribution of the observed variable given the latent variable is diagonal rather than isotropic.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:cm`: Method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `maxiter::Int=1000`: Maximum number of iterations.\n * `tol::Real=1e-6`: Convergence tolerance.\n * `eta::Real=tol`: Variance lower bound.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a factor.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data (number of factors).\n * `variance`: The variance of the factors.\n * `covariance_matrix`: The estimated covariance matrix.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `loadings`: The factor loadings. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nFactorAnalysis = @load FactorAnalysis pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = FactorAnalysis(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`PPCA`](@ref), [`PCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "FactorAnalysis" +":docstring" = """```\nUnivariateBoxCoxTransformer\n```\n\nA model type for constructing a single variable Box-Cox transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateBoxCoxTransformer = @load UnivariateBoxCoxTransformer pkg=unknown\n```\n\nDo `model = UnivariateBoxCoxTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateBoxCoxTransformer(n=...)`.\n\nBox-Cox transformations attempt to make data look more normally distributed. This can improve performance and assist in the interpretation of models which suppose that data is generated by a normal distribution.\n\nA Box-Cox transformation (with shift) is of the form\n\n```\nx -> ((x + c)^λ - 1)/λ\n```\n\nfor some constant `c` and real `λ`, unless `λ = 0`, in which case the above is replaced with\n\n```\nx -> log(x + c)\n```\n\nGiven user-specified hyper-parameters `n::Integer` and `shift::Bool`, the present implementation learns the parameters `c` and `λ` from the training data as follows: If `shift=true` and zeros are encountered in the data, then `c` is set to `0.2` times the data mean. If there are no zeros, then no shift is applied. Finally, `n` different values of `λ` between `-0.4` and `3` are considered, with `λ` fixed to the value maximizing normality of the transformed data.\n\n*Reference:* [Wikipedia entry for power transform](https://en.wikipedia.org/wiki/Power_transform).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Continuous`; check the scitype with `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n=171`: number of values of the exponent `λ` to try\n * `shift=false`: whether to include a preliminary constant translation in transformations, in the presence of zeros\n\n# Operations\n\n * `transform(mach, xnew)`: apply the Box-Cox transformation learned when fitting `mach`\n * `inverse_transform(mach, z)`: reconstruct the vector `z` whose transformation learned by `mach` is `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `λ`: the learned Box-Cox exponent\n * `c`: the learned shift\n\n# Examples\n\n```\nusing MLJ\nusing UnicodePlots\nusing Random\nRandom.seed!(123)\n\ntransf = UnivariateBoxCoxTransformer()\n\nx = randn(1000).^2\n\nmach = machine(transf, x)\nfit!(mach)\n\nz = transform(mach, x)\n\njulia> histogram(x)\n ┌ ┐\n [ 0.0, 2.0) ┤███████████████████████████████████ 848\n [ 2.0, 4.0) ┤████▌ 109\n [ 4.0, 6.0) ┤█▍ 33\n [ 6.0, 8.0) ┤▍ 7\n [ 8.0, 10.0) ┤▏ 2\n [10.0, 12.0) ┤ 0\n [12.0, 14.0) ┤▏ 1\n └ ┘\n Frequency\n\njulia> histogram(z)\n ┌ ┐\n [-5.0, -4.0) ┤█▎ 8\n [-4.0, -3.0) ┤████████▊ 64\n [-3.0, -2.0) ┤█████████████████████▊ 159\n [-2.0, -1.0) ┤█████████████████████████████▊ 216\n [-1.0, 0.0) ┤███████████████████████████████████ 254\n [ 0.0, 1.0) ┤█████████████████████████▊ 188\n [ 1.0, 2.0) ┤████████████▍ 90\n [ 2.0, 3.0) ┤██▊ 20\n [ 3.0, 4.0) ┤▎ 1\n └ ┘\n Frequency\n\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateBoxCoxTransformer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateBoxCoxTransformer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.LinearRegressor] +":input_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\",)`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing,)`" + +[MLJTransforms.InteractionTransformer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Vector{Symbol}}\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Static`" ":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.LinearRegressor" -":hyperparameters" = "`(:bias,)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.InteractionTransformer" +":hyperparameters" = "`(:order, :features)`" ":is_pure_julia" = "`true`" -":human_name" = "linear regressor" -":is_supervised" = "`true`" +":human_name" = "interaction transformer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(bias=...)`.\n\n`LinearRegressor` assumes the target is a `Continuous` variable and trains a linear prediction function using the least squares algorithm. Options exist to specify a bias term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nLinearRegressor = @load LinearRegressor pkg=MultivariateStats\nlinear_regressor = LinearRegressor()\n\nX, y = make_regression(100, 2) # a table and a vector (synthetic data)\nmach = machine(linear_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "LinearRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nInteractionTransformer\n```\n\nA model type for constructing a interaction transformer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nInteractionTransformer = @load InteractionTransformer pkg=unknown\n```\n\nDo `model = InteractionTransformer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `InteractionTransformer(order=...)`.\n\nGenerates all polynomial interaction terms up to the given order for the subset of chosen columns. Any column that contains elements with scitype `<:Infinite` is a valid basis to generate interactions. If `features` is not specified, all such columns with scitype `<:Infinite` in the table are used as a basis.\n\nIn MLJ or MLJBase, you can transform features `X` with the single call\n\n```\ntransform(machine(model), X)\n```\n\nSee also the example below.\n\n# Hyper-parameters\n\n * `order`: Maximum order of interactions to be generated.\n * `features`: Restricts interations generation to those columns\n\n# Operations\n\n * `transform(machine(model), X)`: Generates polynomial interaction terms out of table `X` using the hyper-parameters specified in `model`.\n\n# Example\n\n```\nusing MLJ\n\nX = (\n A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"]\n)\nit = InteractionTransformer(order=3)\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],\n A_C = [7, 16, 27],\n B_C = [28, 40, 54],\n A_B_C = [28, 80, 162],)\n\nit = InteractionTransformer(order=2, features=[:A, :B])\nmach = machine(it)\n\njulia> transform(mach, X)\n(A = [1, 2, 3],\n B = [4, 5, 6],\n C = [7, 8, 9],\n D = [\"x₁\", \"x₂\", \"x₃\"],\n A_B = [4, 10, 18],)\n\n```\n""" +":inverse_transform_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "InteractionTransformer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.ICA] +":input_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Symbol\", \"Bool\", \"Int64\", \"Real\", \"Union{Nothing, Matrix{<:Real}}\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateDiscretizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Int64\",)`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing,)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Continuous}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.ICA" -":hyperparameters" = "`(:outdim, :alg, :fun, :do_whiten, :maxiter, :tol, :winit, :mean)`" -":is_pure_julia" = "`true`" -":human_name" = "independent component analysis model" -":is_supervised" = "`false`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nICA\n```\n\nA model type for constructing a independent component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nICA = @load ICA pkg=MultivariateStats\n```\n\nDo `model = ICA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ICA(outdim=...)`.\n\nIndependent component analysis is a computational technique for separating a multivariate signal into additive subcomponents, with the assumption that the subcomponents are non-Gaussian and independent from each other.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `outdim::Int=0`: The number of independent components to recover, set automatically if `0`.\n * `alg::Symbol=:fastica`: The algorithm to use (only `:fastica` is supported at the moment).\n * `fun::Symbol=:tanh`: The approximate neg-entropy function, one of `:tanh`, `:gaus`.\n * `do_whiten::Bool=true`: Whether or not to perform pre-whitening.\n * `maxiter::Int=100`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance for change in the unmixing matrix W.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: mean to use, if nothing (default) centering is computed and applied, if zero, no centering; otherwise a vector of means can be passed.\n * `winit::Union{Nothing,Matrix{<:Real}}=nothing`: Initial guess for the unmixing matrix `W`: either an empty matrix (for random initialization of `W`), a matrix of size `m × k` (if `do_whiten` is true), or a matrix of size `m × k`. Here `m` is the number of components (columns) of the input.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return the component-separated version of input `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: The estimated component matrix.\n * `mean`: The estimated mean vector.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `mean`: The mean of the untransformed training data, of length `indim`.\n\n# Examples\n\n```\nusing MLJ\n\nICA = @load ICA pkg=MultivariateStats\n\ntimes = range(0, 8, length=2000)\n\nsine_wave = sin.(2*times)\nsquare_wave = sign.(sin.(3*times))\nsawtooth_wave = map(t -> mod(2t, 2) - 1, times)\nsignals = hcat(sine_wave, square_wave, sawtooth_wave)\nnoisy_signals = signals + 0.2*randn(size(signals))\n\nmixing_matrix = [ 1 1 1; 0.5 2 1; 1.5 1 2]\nX = MLJ.table(noisy_signals*mixing_matrix)\n\nmodel = ICA(outdim = 3, tol=0.1)\nmach = machine(model, X) |> fit!\n\nX_unmixed = transform(mach, X)\n\nusing Plots\n\nplot(X.x2)\nplot(X.x2)\nplot(X.x3)\n\nplot(X_unmixed.x1)\nplot(X_unmixed.x2)\nplot(X_unmixed.x3)\n\n```\n\nSee also [`PCA`](@ref), [`KernelPCA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "ICA" +":load_path" = "MLJTransforms.UnivariateDiscretizer" +":hyperparameters" = "`(:n_classes,)`" +":is_pure_julia" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" +":iteration_parameter" = "`nothing`" +":docstring" = """```\nUnivariateDiscretizer\n```\n\nA model type for constructing a single variable discretizer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateDiscretizer = @load UnivariateDiscretizer pkg=unknown\n```\n\nDo `model = UnivariateDiscretizer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateDiscretizer(n_classes=...)`.\n\nDiscretization converts a `Continuous` vector into an `OrderedFactor` vector. In particular, the output is a `CategoricalVector` (whose reference type is optimized).\n\nThe transformation is chosen so that the vector on which the transformer is fit has, in transformed form, an approximately uniform distribution of values. Specifically, if `n_classes` is the level of discretization, then `2*n_classes - 1` ordered quantiles are computed, the odd quantiles being used for transforming (discretization) and the even quantiles for inverse transforming.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with `Continuous` element scitype; check scitype with `scitype(x)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `n_classes`: number of discrete classes in the output\n\n# Operations\n\n * `transform(mach, xnew)`: discretize `xnew` according to the discretization learned when fitting `mach`\n * `inverse_transform(mach, z)`: attempt to reconstruct from `z` a vector that transforms to give `z`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach).fitesult` include:\n\n * `odd_quantiles`: quantiles used for transforming (length is `n_classes - 1`)\n * `even_quantiles`: quantiles used for inverse transforming (length is `n_classes`)\n\n# Example\n\n```\nusing MLJ\nusing Random\nRandom.seed!(123)\n\ndiscretizer = UnivariateDiscretizer(n_classes=100)\nmach = machine(discretizer, randn(1000))\nfit!(mach)\n\njulia> x = rand(5)\n5-element Vector{Float64}:\n 0.8585244609846809\n 0.37541692370451396\n 0.6767070590395461\n 0.9208844241267105\n 0.7064611415680901\n\njulia> z = transform(mach, x)\n5-element CategoricalArrays.CategoricalArray{UInt8,1,UInt8}:\n 0x52\n 0x42\n 0x4d\n 0x54\n 0x4e\n\nx_approx = inverse_transform(mach, z)\njulia> x - x_approx\n5-element Vector{Float64}:\n 0.008224506144777322\n 0.012731354778359405\n 0.0056265330571125816\n 0.005738175684445124\n 0.006835652575801987\n```\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateDiscretizer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform", ":UnivariateDiscretizer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.PPCA] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`" +":transform_scitype" = "`AbstractVector{<:ScientificTypesBase.OrderedFactor}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Int64\", \"Real\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" + +[MLJTransforms.CardinalityReducer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Real\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" -":package_license" = "MIT" +":package_license" = "unknown" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.PPCA" -":hyperparameters" = "`(:maxoutdim, :method, :maxiter, :tol, :mean)`" +":load_path" = "MLJTransforms.CardinalityReducer" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :min_frequency, :label_for_infrequent)`" ":is_pure_julia" = "`true`" -":human_name" = "probabilistic PCA model" +":human_name" = "cardinality reducer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nPPCA\n```\n\nA model type for constructing a probabilistic PCA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPPCA = @load PPCA pkg=MultivariateStats\n```\n\nDo `model = PPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PPCA(maxoutdim=...)`.\n\nProbabilistic principal component analysis is a dimension-reduction algorithm which represents a constrained form of the Gaussian distribution in which the number of free parameters can be restricted while still allowing the model to capture the dominant correlations in a data set. It is expressed as the maximum likelihood solution of a probabilistic latent variable model. For details, see Bishop (2006): C. M. Pattern Recognition and Machine Learning.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `method::Symbol=:ml`: The method to use to solve the problem, one of `:ml`, `:em`, `:bayes`.\n * `maxiter::Int=1000`: The maximum number of iterations.\n * `tol::Real=1e-6`: The convergence tolerance.\n * `mean::Union{Nothing, Real, Vector{Float64}}=nothing`: If `nothing`, centering will be computed and applied; if set to `0` no centering is applied (data is assumed pre-centered); if a vector, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively. Each column of the projection matrix corresponds to a principal component.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `tvat`: The variance of the components.\n * `loadings`: The model's loadings matrix. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` as as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPPCA = @load PPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PPCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "PPCA" +":docstring" = """```\nCardinalityReducer\n```\n\nA model type for constructing a cardinality reducer, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nCardinalityReducer = @load CardinalityReducer pkg=MLJTransforms\n```\n\nDo `model = CardinalityReducer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `CardinalityReducer(features=...)`.\n\n`CardinalityReducer` maps any level of a categorical feature that occurs with frequency < `min_frequency` into a new level (e.g., \"Other\"). This is useful when some categorical features have high cardinality and many levels are infrequent. This assumes that the categorical features have raw types that are in `Union{AbstractString, Char, Number}`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `min_frequency::Real=3`: Any level of a categorical feature that occurs with frequency < `min_frequency` will be mapped to a new level. Could be\n\nan integer or a float which decides whether raw counts or normalized frequencies are used.\n\n * `label_for_infrequent::Dict{<:Type, <:Any}()= Dict( AbstractString => \"Other\", Char => 'O', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then the new value is `\"Other\"` and if the raw type subtypes `Char` then the new value is `'O'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `new_cat_given_col_val`: A dictionary that maps each level in a categorical feature to a new level (either itself or the new level specified in `label_for_infrequent`)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define categorical features\nA = [ [\"a\" for i in 1:100]..., \"b\", \"b\", \"b\", \"c\", \"d\"]\nB = [ [0 for i in 1:100]..., 1, 2, 3, 4, 4]\n\n# Combine into a named tuple\nX = (A = A, B = B)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Multiclass\n)\n\nencoder = CardinalityReducer(ordered_factor = false, min_frequency=3)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia> proportionmap(Xnew.A)\nDict{CategoricalArrays.CategoricalValue{String, UInt32}, Float64} with 3 entries:\n \"Other\" => 0.0190476\n \"b\" => 0.0285714\n \"a\" => 0.952381\n\njulia> proportionmap(Xnew.B)\nDict{CategoricalArrays.CategoricalValue{Int64, UInt32}, Float64} with 2 entries:\n 0 => 0.952381\n -1 => 0.047619\n```\n\nSee also [`FrequencyEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "CardinalityReducer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.RidgeRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJTransforms.OrdinalEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{ScientificTypesBase.Continuous}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.RidgeRegressor" -":hyperparameters" = "`(:lambda, :bias)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.OrdinalEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :output_type)`" ":is_pure_julia" = "`true`" -":human_name" = "ridge regressor" -":is_supervised" = "`true`" +":human_name" = "ordinal encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nRidgeRegressor\n```\n\nA model type for constructing a ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = RidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `RidgeRegressor(lambda=...)`.\n\n`RidgeRegressor` adds a quadratic penalty term to least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\n\nRidgeRegressor = @load RidgeRegressor pkg=MultivariateStats\npipe = Standardizer() |> RidgeRegressor(lambda=10)\n\nX, y = @load_boston\n\nmach = machine(pipe, X, y) |> fit!\nyhat = predict(mach, X)\ntraining_error = l1(yhat, y) |> mean\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`MultitargetRidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "RidgeRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nOrdinalEncoder\n```\n\nA model type for constructing a ordinal encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOrdinalEncoder = @load OrdinalEncoder pkg=MLJTransforms\n```\n\nDo `model = OrdinalEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OrdinalEncoder(features=...)`.\n\n`OrdinalEncoder` implements ordinal encoding which replaces the categorical values in the specified categorical features with integers (ordered arbitrarily). This will create an implicit ordering between categories which may not be a proper modelling assumption.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `output_type`: The numerical concrete type of the encoded features. Default is `Float32`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply ordinal encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `index_given_feat_level`: A dictionary that maps each level for each column in a subset of the categorical features of X into an integer.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical features\nA = [\"g\", \"b\", \"g\", \"r\", \"r\",] \nB = [1.0, 2.0, 3.0, 4.0, 5.0,]\nC = [\"f\", \"f\", \"f\", \"m\", \"f\",] \nD = [true, false, true, false, true,]\nE = [1, 2, 3, 4, 5,]\n\n# Combine into a named tuple\nX = (A = A, B = B, C = C, D = D, E = E)\n\n# Coerce A, C, D to multiclass and B to continuous and E to ordinal\nX = coerce(X,\n:A => Multiclass,\n:B => Continuous,\n:C => Multiclass,\n:D => Multiclass,\n:E => OrderedFactor,\n)\n\n# Check scitype coercion:\nschema(X)\n\nencoder = OrdinalEncoder(ordered_factor = false)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (A = [2, 1, 2, 3, 3],\n B = [1.0, 2.0, 3.0, 4.0, 5.0],\n C = [1, 1, 1, 2, 1],\n D = [2, 1, 2, 1, 2],\n E = CategoricalArrays.CategoricalValue{Int64, UInt32}[1, 2, 3, 4, 5],)\n```\n\nSee also [`TargetEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "OrdinalEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" -":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.KernelPCA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Union{Nothing, Function}\", \"Symbol\", \"Bool\", \"Real\", \"Real\", \"Int64\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.FillImputer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Vector{Symbol}\", \"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.KernelPCA" -":hyperparameters" = "`(:maxoutdim, :kernel, :solver, :inverse, :beta, :tol, :maxiter)`" +":load_path" = "MLJTransforms.FillImputer" +":hyperparameters" = "`(:features, :continuous_fill, :count_fill, :finite_fill)`" ":is_pure_julia" = "`true`" -":human_name" = "kernel prinicipal component analysis model" +":human_name" = "fill imputer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nKernelPCA\n```\n\nA model type for constructing a kernel prinicipal component analysis model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n```\n\nDo `model = KernelPCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `KernelPCA(maxoutdim=...)`.\n\nIn kernel PCA the linear operations of ordinary principal component analysis are performed in a [reproducing Hilbert space](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Controls the the dimension (number of columns) of the output, `outdim`. Specifically, `outdim = min(n, indim, maxoutdim)`, where `n` is the number of observations and `indim` the input dimension.\n * `kernel::Function=(x,y)->x'y`: The kernel function, takes in 2 vector arguments x and y, returns a scalar value. Defaults to the dot product of `x` and `y`.\n * `solver::Symbol=:eig`: solver to use for the eigenvalues, one of `:eig`(default, uses `LinearAlgebra.eigen`), `:eigs`(uses `Arpack.eigs`).\n * `inverse::Bool=true`: perform calculations needed for inverse transform\n * `beta::Real=1.0`: strength of the ridge regression that learns the inverse transform when inverse is true.\n * `tol::Real=0.0`: Convergence tolerance for eigenvalue solver.\n * `maxiter::Int=300`: maximum number of iterations for eigenvalue solver.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and ouput respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim`: Dimension of transformed data.\n * `principalvars`: The variance of the principal components.\n\n# Examples\n\n```\nusing MLJ\nusing LinearAlgebra\n\nKernelPCA = @load KernelPCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nfunction rbf_kernel(length_scale)\n return (x,y) -> norm(x-y)^2 / ((2 * length_scale)^2)\nend\n\nmodel = KernelPCA(maxoutdim=2, kernel=rbf_kernel(1))\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`PCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "KernelPCA" +":docstring" = """```\nFillImputer\n```\n\nA model type for constructing a fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nFillImputer = @load FillImputer pkg=unknown\n```\n\nDo `model = FillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `FillImputer(features=...)`.\n\nUse this model to impute `missing` values in tabular data. A fixed \"filler\" value is learned from the training data, one for each column of the table.\n\nFor imputing missing values in a vector, use [`UnivariateFillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose features each have element scitypes `Union{Missing, T}`, where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`. Check scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `features`: a vector of names of features (symbols) for which imputation is to be attempted; default is empty, which is interpreted as \"impute all\".\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, Xnew)`: return `Xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `features_seen_in_fit`: the names of features (features) encountered during training\n * `univariate_transformer`: the univariate model applied to determine the fillers (it's fields contain the functions defining the filler computations)\n * `filler_given_feature`: dictionary of filler values, keyed on feature (column) names\n\n# Examples\n\n```\nusing MLJ\nimputer = FillImputer()\n\nX = (a = [1.0, 2.0, missing, 3.0, missing],\n b = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass),\n c = [1, 1, 2, missing, 3])\n\nschema(X)\njulia> schema(X)\n┌───────┬───────────────────────────────┐\n│ names │ scitypes │\n├───────┼───────────────────────────────┤\n│ a │ Union{Missing, Continuous} │\n│ b │ Union{Missing, Multiclass{2}} │\n│ c │ Union{Missing, Count} │\n└───────┴───────────────────────────────┘\n\nmach = machine(imputer, X)\nfit!(mach)\n\njulia> fitted_params(mach).filler_given_feature\n(filler = 2.0,)\n\njulia> fitted_params(mach).filler_given_feature\nDict{Symbol, Any} with 3 entries:\n :a => 2.0\n :b => \"y\"\n :c => 2\n\njulia> transform(mach, X)\n(a = [1.0, 2.0, 2.0, 3.0, 2.0],\n b = CategoricalValue{String, UInt32}[\"y\", \"n\", \"y\", \"y\", \"y\"],\n c = [1, 1, 2, 2, 3],)\n```\n\nSee also [`UnivariateFillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "FillImputer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":FillImputer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.MultitargetRidgeRegressor] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Union{Real, AbstractVecOrMat}\", \"Bool\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing)`" + +[MLJTransforms.MissingnessEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Bool\", \"Dict{T} where T<:Type\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" -":package_license" = "MIT" -":prediction_type" = ":deterministic" -":load_path" = "MLJMultivariateStatsInterface.MultitargetRidgeRegressor" -":hyperparameters" = "`(:lambda, :bias)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.MissingnessEncoder" +":hyperparameters" = "`(:features, :ignore, :ordered_factor, :label_for_missing)`" ":is_pure_julia" = "`true`" -":human_name" = "multitarget ridge regressor" -":is_supervised" = "`true`" +":human_name" = "missingness encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nMultitargetRidgeRegressor\n```\n\nA model type for constructing a multitarget ridge regressor, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n```\n\nDo `model = MultitargetRidgeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetRidgeRegressor(lambda=...)`.\n\nMulti-target ridge regression adds a quadratic penalty term to multi-target least squares regression, for regularization. Ridge regression is particularly useful in the case of multicollinearity. In this case, the output represents a response vector. Options exist to specify a bias term, and to adjust the strength of the penalty term.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any table of responses whose element scitype is `Continuous`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `lambda=1.0`: Is the non-negative parameter for the regularization strength. If lambda is 0, ridge regression is equivalent to linear least squares regression, and as lambda approaches infinity, all the linear coefficients approach 0.\n * `bias=true`: Include the bias term if true, otherwise fit without bias term.\n\n# Operations\n\n * `predict(mach, Xnew)`: Return predictions of the target given new features `Xnew`, which should have the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `coefficients`: The linear coefficients determined by the model.\n * `intercept`: The intercept determined by the model.\n\n# Examples\n\n```\nusing MLJ\nusing DataFrames\n\nRidgeRegressor = @load MultitargetRidgeRegressor pkg=MultivariateStats\n\nX, y = make_regression(100, 6; n_targets = 2) # a table and a table (synthetic data)\n\nridge_regressor = RidgeRegressor(lambda=1.5)\nmach = machine(ridge_regressor, X, y) |> fit!\n\nXnew, _ = make_regression(3, 6)\nyhat = predict(mach, Xnew) # new predictions\n```\n\nSee also [`LinearRegressor`](@ref), [`MultitargetLinearRegressor`](@ref), [`RidgeRegressor`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "MultitargetRidgeRegressor" -":target_in_fit" = "`true`" +":docstring" = """```\nMissingnessEncoder\n```\n\nA model type for constructing a missingness encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMissingnessEncoder = @load MissingnessEncoder pkg=MLJTransforms\n```\n\nDo `model = MissingnessEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MissingnessEncoder(features=...)`.\n\n`MissingnessEncoder` maps any missing level of a categorical feature into a new level (e.g., \"Missing\"). By this, missingness will be treated as a new level by any subsequent model. This assumes that the categorical features have raw types that are in `Char`, `AbstractString`, and `Number`.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n * `label_for_missing::Dict{<:Type, <:Any}()= Dict( AbstractString => \"missing\", Char => 'm', )`: A\n\ndictionary where the possible values for keys are the types in `Char`, `AbstractString`, and `Number` and where each value signifies the new level to map into given a column raw super type. By default, if the raw type of the column subtypes `AbstractString` then missing values will be replaced with `\"missing\"` and if the raw type subtypes `Char` then the new value is `'m'` and if the raw type subtypes `Number` then the new value is the lowest value in the column - 1.\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply cardinality reduction to selected `Multiclass` or `OrderedFactor` features of `Xnew` specified by hyper-parameters, and return the new table. Features that are neither `Multiclass` nor `OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `label_for_missing_given_feature`: A dictionary that for each column, maps `missing` into some value according to `label_for_missing`\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nimport StatsBase.proportionmap\nusing MLJ\n\n# Define a table with missing values\nXm = (\n A = categorical([\"Ben\", \"John\", missing, missing, \"Mary\", \"John\", missing]),\n B = [1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C= categorical([7, 5, missing, missing, 10, 0, missing]),\n D = [23, 23, 44, 66, 14, 23, 11],\n E = categorical([missing, 'g', 'r', missing, 'r', 'g', 'p'])\n)\n\nencoder = MissingnessEncoder()\nmach = fit!(machine(encoder, Xm))\nXnew = transform(mach, Xm)\n\njulia> Xnew\n(A = [\"Ben\", \"John\", \"missing\", \"missing\", \"Mary\", \"John\", \"missing\"],\n B = Union{Missing, Float64}[1.85, 1.67, missing, missing, 1.5, 1.67, missing],\n C = [7, 5, -1, -1, 10, 0, -1],\n D = [23, 23, 44, 66, 14, 23, 11],\n E = ['m', 'g', 'r', 'm', 'r', 'g', 'p'],)\n\n```\n\nSee also [`CardinalityReducer`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "MissingnessEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":target_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.SubspaceLDA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Bool\", \"Int64\", \"Distances.SemiMetric\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" + +[MLJTransforms.ContrastEncoder] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Any\", \"Bool\", \"Union{Symbol, AbstractVector{Symbol}}\", \"Any\", \"Bool\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table}`" +":output_scitype" = "`ScientificTypesBase.Table`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.SubspaceLDA" -":hyperparameters" = "`(:normalize, :outdim, :dist)`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" +":package_license" = "unknown" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.ContrastEncoder" +":hyperparameters" = "`(:features, :ignore, :mode, :buildmatrix, :ordered_factor)`" ":is_pure_julia" = "`true`" -":human_name" = "subpace LDA model" -":is_supervised" = "`true`" +":human_name" = "contrast encoder" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSubspaceLDA\n```\n\nA model type for constructing a subpace LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n```\n\nDo `model = SubspaceLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SubspaceLDA(normalize=...)`.\n\nMulticlass subspace linear discriminant analysis (LDA) is a variation on ordinary [`LDA`](@ref) suitable for high dimensional data, as it avoids storing scatter matrices. For details, refer the [MultivariateStats.jl documentation](https://juliastats.org/MultivariateStats.jl/stable/).\n\nIn addition to dimension reduction (using `transform`) probabilistic classification is provided (using `predict`). In the case of classification, the class probability for a new observation reflects the proximity of that observation to training observations associated with that class, and how far away the observation is from observations associated with other classes. Specifically, the distances, in the transformed (projected) space, of a new observation, from the centroid of each target class, is computed; the resulting vector of distances, multiplied by minus one, is passed to a softmax function to obtain a class probability prediction. Here \"distance\" is computed using a user-specified distance function.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `normalize=true`: Option to normalize the between class variance for the number of observations in each class, one of `true` or `false`.\n * `outdim`: the ouput dimension, automatically set to `min(indim, nclasses-1)` if equal to `0`. If a non-zero `outdim` is passed, then the actual output dimension used is `min(rank, outdim)` where `rank` is the rank of the within-class covariance matrix.\n * `dist=Distances.SqEuclidean()`: The distance metric to use when performing classification (to compare the distance between a new point and centroids in the transformed space); must be a subtype of `Distances.SemiMetric` from Distances.jl, e.g., `Distances.CosineDist`.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool)\n\n`class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `explained_variance_ratio`: The ratio of explained variance to total variance. Each dimension corresponds to an eigenvalue.\n\n# Examples\n\n```\nusing MLJ\n\nSubspaceLDA = @load SubspaceLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = SubspaceLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`BayesianLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "SubspaceLDA" -":target_in_fit" = "`true`" +":docstring" = """```\nContrastEncoder\n```\n\nA model type for constructing a contrast encoder, based on [MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nContrastEncoder = @load ContrastEncoder pkg=MLJTransforms\n```\n\nDo `model = ContrastEncoder()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ContrastEncoder(features=...)`.\n\n`ContrastEncoder` implements the following contrast encoding methods for categorical features: dummy, sum, backward/forward difference, and Helmert coding. More generally, users can specify a custom contrast or hypothesis matrix, and each feature can be encoded using a different method.\n\n# Training data\n\nIn MLJ (or MLJBase) bind an instance unsupervised `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`). Features to be transformed must have element scitype `Multiclass` or `OrderedFactor`. Use `schema(X)` to check scitypes.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * features=[]: A list of names of categorical features given as symbols to exclude or include from encoding, according to the value of `ignore`, or a single symbol (which is treated as a vector with one symbol), or a callable that returns true for features to be included/excluded\n * `mode=:dummy`: The type of encoding to use. Can be one of `:contrast`, `:dummy`, `:sum`, `:backward_diff`, `:forward_diff`, `:helmert` or `:hypothesis`.\n\nIf `ignore=false` (features to be encoded are listed explictly in `features`), then this can be a vector of the same length as `features` to specify a different contrast encoding scheme for each feature\n\n * `buildmatrix=nothing`: A function or other callable with signature `buildmatrix(colname, k)`,\n\nwhere `colname` is the name of the feature levels and `k` is it's length, and which returns contrast or hypothesis matrix with row/column ordering consistent with the ordering of `levels(col)`. Only relevant if `mode` is `:contrast` or `:hypothesis`.\n\n * ignore=true: Whether to exclude or include the features given in `features`\n * ordered_factor=false: Whether to encode `OrderedFactor` or ignore them\n\n# Operations\n\n * `transform(mach, Xnew)`: Apply contrast encoding to selected `Multiclass` or `OrderedFactor features of`Xnew`specified by hyper-parameters, and return the new table. Features that are neither`Multiclass`nor`OrderedFactor` are always left unchanged.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `vector_given_value_given_feature`: A dictionary that maps each level for each column in a subset of the categorical features of X into its frequency.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * encoded_features: The subset of the categorical features of `X` that were encoded\n\n# Examples\n\n```julia\nusing MLJ\n\n# Define categorical dataset\nX = (\n name = categorical([\"Ben\", \"John\", \"Mary\", \"John\"]),\n height = [1.85, 1.67, 1.5, 1.67],\n favnum = categorical([7, 5, 10, 1]),\n age = [23, 23, 14, 23],\n)\n\n# Check scitype coercions:\nschema(X)\n\nencoder = ContrastEncoder(\n features = [:name, :favnum],\n ignore = false, \n mode = [:dummy, :helmert],\n)\nmach = fit!(machine(encoder, X))\nXnew = transform(mach, X)\n\njulia > Xnew\n (name_John = [1.0, 0.0, 0.0, 0.0],\n name_Mary = [0.0, 1.0, 0.0, 1.0],\n height = [1.85, 1.67, 1.5, 1.67],\n favnum_5 = [0.0, 1.0, 0.0, -1.0],\n favnum_7 = [2.0, -1.0, 0.0, -1.0],\n favnum_10 = [-1.0, -1.0, 3.0, -1.0],\n age = [23, 23, 14, 23],)\n```\n\nSee also [`OneHotEncoder`](@ref)\n""" +":inverse_transform_scitype" = "`ScientificTypesBase.Table`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "ContrastEncoder" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.BayesianLDA] +":input_scitype" = "`ScientificTypesBase.Table`" +":transform_scitype" = "`ScientificTypesBase.Table`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Symbol\", \"StatsBase.CovarianceEstimator\", \"StatsBase.CovarianceEstimator\", \"Int64\", \"Float64\", \"Union{Nothing, Dict{<:Any, <:Real}, CategoricalDistributions.UnivariateFinite{<:Any, <:Any, <:Any, <:Real}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateStandardizer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`()`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`()`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{AbstractVector{<:ScientificTypesBase.Infinite}}`" +":output_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" -":prediction_type" = ":probabilistic" -":load_path" = "MLJMultivariateStatsInterface.BayesianLDA" -":hyperparameters" = "`(:method, :cov_w, :cov_b, :outdim, :regcoef, :priors)`" +":prediction_type" = ":unknown" +":load_path" = "MLJTransforms.UnivariateStandardizer" +":hyperparameters" = "`()`" ":is_pure_julia" = "`true`" -":human_name" = "Bayesian LDA model" -":is_supervised" = "`true`" +":human_name" = "single variable discretizer" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nBayesianLDA\n```\n\nA model type for constructing a Bayesian LDA model, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n```\n\nDo `model = BayesianLDA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `BayesianLDA(method=...)`.\n\nThe Bayesian multiclass LDA algorithm learns a projection matrix as described in ordinary [`LDA`](@ref). Predicted class posterior probability distributions are derived by applying Bayes' rule with a multivariate Gaussian class-conditional distribution. A prior class distribution can be specified by the user or inferred from training data class frequency.\n\nSee also the [package documentation](https://multivariatestatsjl.readthedocs.io/en/latest/lda.html). For more information about the algorithm, see [Li, Zhu and Ogihara (2006): Using Discriminant Analysis for Multi-class Classification: An Experimental Investigation](https://doi.org/10.1007/s10115-006-0013-y).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n * `y` is the target, which can be any `AbstractVector` whose element scitype is `OrderedFactor` or `Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `method::Symbol=:gevd`: choice of solver, one of `:gevd` or `:whiten` methods.\n * `cov_w::StatsBase.SimpleCovariance()`: An estimator for the within-class covariance (used in computing the within-class scatter matrix, `Sw`). Any robust estimator from `CovarianceEstimation.jl` can be used.\n * `cov_b::StatsBase.SimpleCovariance()`: The same as `cov_w` but for the between-class covariance (used in computing the between-class scatter matrix, `Sb`).\n * `outdim::Int=0`: The output dimension, i.e., dimension of the transformed space, automatically set to `min(indim, nclasses-1)` if equal to 0.\n * `regcoef::Float64=1e-6`: The regularization coefficient. A positive value `regcoef*eigmax(Sw)` where `Sw` is the within-class scatter matrix, is added to the diagonal of `Sw` to improve numerical stability. This can be useful if using the standard covariance estimator.\n * `priors::Union{Nothing, UnivariateFinite{<:Any, <:Any, <:Any, <:Real}, Dict{<:Any, <:Real}} = nothing`: For use in prediction with Bayes rule. If `priors = nothing` then `priors` are estimated from the class proportions in the training data. Otherwise it requires a `Dict` or `UnivariateFinite` object specifying the classes with non-zero probabilities in the training target.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated.\n * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `classes`: The classes seen during model fitting.\n * `projection_matrix`: The learned projection matrix, of size `(indim, outdim)`, where `indim` and `outdim` are the input and output dimensions respectively (See Report section below).\n * `priors`: The class priors for classification. As inferred from training target `y`, if not user-specified. A `UnivariateFinite` object with levels consistent with `levels(y)`.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: The dimension of the input space i.e the number of training features.\n * `outdim`: The dimension of the transformed space the model is projected to.\n * `mean`: The mean of the untransformed training data. A vector of length `indim`.\n * `nclasses`: The number of classes directly observed in the training data (which can be less than the total number of classes in the class pool).\n * `class_means`: The class-specific means of the training data. A matrix of size `(indim, nclasses)` with the ith column being the class-mean of the ith class in `classes` (See fitted params section above).\n * `class_weights`: The weights (class counts) of each class. A vector of length `nclasses` with the ith element being the class weight of the ith class in `classes`. (See fitted params section above.)\n * `Sb`: The between class scatter matrix.\n * `Sw`: The within class scatter matrix.\n\n# Examples\n\n```\nusing MLJ\n\nBayesianLDA = @load BayesianLDA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = BayesianLDA()\nmach = machine(model, X, y) |> fit!\n\nXproj = transform(mach, X)\ny_hat = predict(mach, X)\nlabels = predict_mode(mach, X)\n```\n\nSee also [`LDA`](@ref), [`SubspaceLDA`](@ref), [`BayesianSubspaceLDA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "BayesianLDA" -":target_in_fit" = "`true`" +":docstring" = """```\nUnivariateStandardizer()\n```\n\nTransformer type for standardizing (whitening) single variable data.\n\nThis model may be deprecated in the future. Consider using [`Standardizer`](@ref), which handles both tabular *and* univariate data.\n""" +":inverse_transform_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateStandardizer" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":inverse_transform", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJMultivariateStatsInterface.PCA] +":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Infinite}`" +":transform_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`" ":constructor" = "`nothing`" -":hyperparameter_types" = "`(\"Int64\", \"Symbol\", \"Float64\", \"Union{Nothing, Real, Vector{Float64}}\")`" -":package_uuid" = "6f286f6a-111f-5878-ab1e-185364afe411" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" + +[MLJTransforms.UnivariateFillImputer] +":is_wrapper" = "`false`" +":hyperparameter_types" = "`(\"Function\", \"Function\", \"Function\")`" +":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6" +":hyperparameter_ranges" = "`(nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}`" -":output_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" +":fit_data_scitype" = "`Tuple{Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}}`" +":output_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":tags" = [] ":abstract_type" = "`MLJModelInterface.Unsupervised`" ":package_license" = "MIT" ":prediction_type" = ":unknown" -":load_path" = "MLJMultivariateStatsInterface.PCA" -":hyperparameters" = "`(:maxoutdim, :method, :variance_ratio, :mean)`" +":load_path" = "MLJTransforms.UnivariateFillImputer" +":hyperparameters" = "`(:continuous_fill, :count_fill, :finite_fill)`" ":is_pure_julia" = "`true`" -":human_name" = "pca" +":human_name" = "single variable fill imputer" ":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nPCA\n```\n\nA model type for constructing a pca, based on [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nPCA = @load PCA pkg=MultivariateStats\n```\n\nDo `model = PCA()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `PCA(maxoutdim=...)`.\n\nPrincipal component analysis learns a linear projection onto a lower dimensional space while preserving most of the initial variance seen in the training data.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X)\n```\n\nHere:\n\n * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `maxoutdim=0`: Together with `variance_ratio`, controls the output dimension `outdim` chosen by the model. Specifically, suppose that `k` is the smallest integer such that retaining the `k` most significant principal components accounts for `variance_ratio` of the total variance in the training data. Then `outdim = min(outdim, maxoutdim)`. If `maxoutdim=0` (default) then the effective `maxoutdim` is `min(n, indim - 1)` where `n` is the number of observations and `indim` the number of features in the training data.\n * `variance_ratio::Float64=0.99`: The ratio of variance preserved after the transformation\n * `method=:auto`: The method to use to solve the problem. Choices are\n\n * `:svd`: Support Vector Decomposition of the matrix.\n * `:cov`: Covariance matrix decomposition.\n * `:auto`: Use `:cov` if the matrices first dimension is smaller than its second dimension and otherwise use `:svd`\n * `mean=nothing`: if `nothing`, centering will be computed and applied, if set to `0` no centering (data is assumed pre-centered); if a vector is passed, the centering is done with that vector.\n\n# Operations\n\n * `transform(mach, Xnew)`: Return a lower dimensional projection of the input `Xnew`, which should have the same scitype as `X` above.\n * `inverse_transform(mach, Xsmall)`: For a dimension-reduced table `Xsmall`, such as returned by `transform`, reconstruct a table, having same the number of columns as the original training data `X`, that transforms to `Xsmall`. Mathematically, `inverse_transform` is a right-inverse for the PCA projection map, whose image is orthogonal to the kernel of that map. In particular, if `Xsmall = transform(mach, Xnew)`, then `inverse_transform(Xsmall)` is only an approximation to `Xnew`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `projection`: Returns the projection matrix, which has size `(indim, outdim)`, where `indim` and `outdim` are the number of features of the input and output respectively.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `indim`: Dimension (number of columns) of the training data and new data to be transformed.\n * `outdim = min(n, indim, maxoutdim)` is the output dimension; here `n` is the number of observations.\n * `tprincipalvar`: Total variance of the principal components.\n * `tresidualvar`: Total residual variance.\n * `tvar`: Total observation variance (principal + residual variance).\n * `mean`: The mean of the untransformed training data, of length `indim`.\n * `principalvars`: The variance of the principal components. An AbstractVector of length `outdim`\n * `loadings`: The models loadings, weights for each variable used when calculating principal components. A matrix of size (`indim`, `outdim`) where `indim` and `outdim` are as defined above.\n\n# Examples\n\n```\nusing MLJ\n\nPCA = @load PCA pkg=MultivariateStats\n\nX, y = @load_iris # a table and a vector\n\nmodel = PCA(maxoutdim=2)\nmach = machine(model, X) |> fit!\n\nXproj = transform(mach, X)\n```\n\nSee also [`KernelPCA`](@ref), [`ICA`](@ref), [`FactorAnalysis`](@ref), [`PPCA`](@ref)\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":package_url" = "https://github.com/JuliaStats/MultivariateStats.jl" -":package_name" = "MultivariateStats" -":name" = "PCA" +":docstring" = """```\nUnivariateFillImputer\n```\n\nA model type for constructing a single variable fill imputer, based on [unknown.jl](unknown), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nUnivariateFillImputer = @load UnivariateFillImputer pkg=unknown\n```\n\nDo `model = UnivariateFillImputer()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `UnivariateFillImputer(continuous_fill=...)`.\n\nUse this model to imputing `missing` values in a vector with a fixed value learned from the non-missing values of training vector.\n\nFor imputing missing values in tabular data, use [`FillImputer`](@ref) instead.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, x)\n```\n\nwhere\n\n * `x`: any abstract vector with element scitype `Union{Missing, T}` where `T` is a subtype of `Continuous`, `Multiclass`, `OrderedFactor` or `Count`; check scitype using `scitype(x)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `continuous_fill`: function or other callable to determine value to be imputed in the case of `Continuous` (abstract float) data; default is to apply `median` after skipping `missing` values\n * `count_fill`: function or other callable to determine value to be imputed in the case of `Count` (integer) data; default is to apply rounded `median` after skipping `missing` values\n * `finite_fill`: function or other callable to determine value to be imputed in the case of `Multiclass` or `OrderedFactor` data (categorical vectors); default is to apply `mode` after skipping `missing` values\n\n# Operations\n\n * `transform(mach, xnew)`: return `xnew` with missing values imputed with the fill values learned when fitting `mach`\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `filler`: the fill value to be imputed in all new data\n\n# Examples\n\n```\nusing MLJ\nimputer = UnivariateFillImputer()\n\nx_continuous = [1.0, 2.0, missing, 3.0]\nx_multiclass = coerce([\"y\", \"n\", \"y\", missing, \"y\"], Multiclass)\nx_count = [1, 1, 1, 2, missing, 3, 3]\n\nmach = machine(imputer, x_continuous)\nfit!(mach)\n\njulia> fitted_params(mach)\n(filler = 2.0,)\n\njulia> transform(mach, [missing, missing, 101.0])\n3-element Vector{Float64}:\n 2.0\n 2.0\n 101.0\n\nmach2 = machine(imputer, x_multiclass) |> fit!\n\njulia> transform(mach2, x_multiclass)\n5-element CategoricalArray{String,1,UInt32}:\n \"y\"\n \"n\"\n \"y\"\n \"y\"\n \"y\"\n\nmach3 = machine(imputer, x_count) |> fit!\n\njulia> transform(mach3, [missing, missing, 5])\n3-element Vector{Int64}:\n 2\n 2\n 5\n```\n\nFor imputing tabular data, use [`FillImputer`](@ref).\n""" +":inverse_transform_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl" +":package_name" = "MLJTransforms" +":name" = "UnivariateFillImputer" ":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":inverse_transform", ":transform"] +":implemented_methods" = [":fit", ":fitted_params", ":transform", ":UnivariateFillImputer"] ":deep_properties" = "`()`" ":predict_scitype" = "`ScientificTypesBase.Unknown`" ":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":transform_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" -":is_wrapper" = "`false`" +":input_scitype" = "`Union{AbstractVector{<:Union{Missing, ScientificTypesBase.Continuous}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Count}}, AbstractVector{<:Union{Missing, ScientificTypesBase.Finite}}}`" +":transform_scitype" = "`Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.Finite}}`" +":constructor" = "`nothing`" -[MLJLIBSVMInterface.ProbabilisticNuSVC] -":is_wrapper" = "`false`" +[MLJLIBSVMInterface.OneClassSVM] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" +":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" +":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" ":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":prediction_type" = ":unknown" +":load_path" = "MLJLIBSVMInterface.OneClassSVM" ":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":is_pure_julia" = "`false`" -":human_name" = "probabilistic ν-support vector classifier" -":is_supervised" = "`true`" +":human_name" = "one-class support vector machine" +":is_supervised" = "`false`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":docstring" = """```\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":package_name" = "LIBSVM" -":name" = "ProbabilisticNuSVC" -":target_in_fit" = "`true`" +":name" = "OneClassSVM" +":target_in_fit" = "`false`" ":supports_class_weights" = "`false`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" -":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`ScientificTypesBase.Unknown`" +":target_scitype" = "`ScientificTypesBase.Unknown`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.EpsilonSVR] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9213,10 +8880,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.LinearSVC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"LIBSVM.Linearsolver.LINEARSOLVER\", \"Float64\", \"Float64\", \"Float64\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`" @@ -9250,10 +8917,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.ProbabilisticSVC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9287,10 +8954,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.NuSVR] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9324,10 +8991,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJLIBSVMInterface.NuSVC] -":is_wrapper" = "`false`" +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9361,84 +9028,84 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJLIBSVMInterface.SVC] ":is_wrapper" = "`false`" + +[MLJLIBSVMInterface.ProbabilisticNuSVC] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}`" ":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.Deterministic`" +":abstract_type" = "`MLJModelInterface.Probabilistic`" ":package_license" = "unknown" -":prediction_type" = ":deterministic" -":load_path" = "MLJLIBSVMInterface.SVC" -":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":prediction_type" = ":probabilistic" +":load_path" = "MLJLIBSVMInterface.ProbabilisticNuSVC" +":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":is_pure_julia" = "`false`" -":human_name" = "C-support vector classifier" +":human_name" = "probabilistic ν-support vector classifier" ":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" +":docstring" = """```\nProbabilisticNuSVC\n```\n\nA model type for constructing a probabilistic ν-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM\n```\n\nDo `model = ProbabilisticNuSVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `ProbabilisticNuSVC(kernel=...)`.\n\nThis model is identical to [`NuSVC`](@ref) with the exception that it predicts probabilities, instead of actual class labels. Probabilities are computed using Platt scaling, which will add to total computation time.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n[Platt, John (1999): \"Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods.\"](https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393)\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nProbabilisticNuSVC = @load ProbabilisticNuSVC pkg=LIBSVM # model type\nmodel = ProbabilisticNuSVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> probs = predict(mach, Xnew)\n3-element UnivariateFiniteVector{Multiclass{3}, String, UInt32, Float64}:\n UnivariateFinite{Multiclass{3}}(setosa=>0.00313, versicolor=>0.0247, virginica=>0.972)\n UnivariateFinite{Multiclass{3}}(setosa=>0.000598, versicolor=>0.0155, virginica=>0.984)\n UnivariateFinite{Multiclass{3}}(setosa=>2.27e-6, versicolor=>2.73e-6, virginica=>1.0)\n\njulia> yhat = mode.(probs)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = ProbabilisticNuSVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\nprobs = predict(mach, Xnew)\n```\n\nSee also the classifiers [`NuSVC`](@ref), [`SVC`](@ref), [`ProbabilisticSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation. [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":package_name" = "LIBSVM" -":name" = "SVC" +":name" = "ProbabilisticNuSVC" ":target_in_fit" = "`true`" -":supports_class_weights" = "`true`" +":supports_class_weights" = "`false`" ":supports_online" = "`false`" ":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{<:ScientificTypesBase.Finite}}`" ":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" - -[MLJLIBSVMInterface.OneClassSVM] ":is_wrapper" = "`false`" + +[MLJLIBSVMInterface.SVC] +":constructor" = "`nothing`" ":hyperparameter_types" = "`(\"Any\", \"Float64\", \"Float64\", \"Float64\", \"Int32\", \"Float64\", \"Float64\", \"Bool\")`" ":package_uuid" = "b1bec4e5-fd48-53fe-b0cb-9723c09d164b" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" ":reporting_operations" = "`()`" -":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, ScientificTypesBase.Unknown}}`" -":output_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`" +":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}}, Tuple{ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}, AbstractVector{<:ScientificTypesBase.Finite}, Any}}`" +":output_scitype" = "`ScientificTypesBase.Unknown`" ":tags" = [] -":abstract_type" = "`MLJModelInterface.UnsupervisedDetector`" +":abstract_type" = "`MLJModelInterface.Deterministic`" ":package_license" = "unknown" -":prediction_type" = ":unknown" -":load_path" = "MLJLIBSVMInterface.OneClassSVM" -":hyperparameters" = "`(:kernel, :gamma, :nu, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" +":prediction_type" = ":deterministic" +":load_path" = "MLJLIBSVMInterface.SVC" +":hyperparameters" = "`(:kernel, :gamma, :cost, :cachesize, :degree, :coef0, :tolerance, :shrinking)`" ":is_pure_julia" = "`false`" -":human_name" = "one-class support vector machine" -":is_supervised" = "`false`" +":human_name" = "C-support vector classifier" +":is_supervised" = "`true`" ":iteration_parameter" = "`nothing`" -":docstring" = """```\nOneClassSVM\n```\n\nA model type for constructing a one-class support vector machine, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nOneClassSVM = @load OneClassSVM pkg=LIBSVM\n```\n\nDo `model = OneClassSVM()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `OneClassSVM(kernel=...)`.\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\nThis model is an outlier detection model delivering raw scores based on the decision function of a support vector machine. Like the [`NuSVC`](@ref) classifier, it uses the `nu` re-parameterization of the `cost` parameter appearing in standard support vector classification [`SVC`](@ref).\n\nTo extract normalized scores (\"probabilities\") wrap the model using `ProbabilisticDetector` from [OutlierDetection.jl](https://github.com/OutlierDetectionJL/OutlierDetection.jl). For threshold-based classification, wrap the probabilistic model using MLJ's `BinaryThresholdPredictor`. Examples of wrapping appear below.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with:\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `nu=0.5` (range (0, 1]): An upper bound on the fraction of margin errors and a lower bound of the fraction of support vectors. Denoted `ν` in the cited paper. Changing `nu` changes the thickness of the margin (a neighborhood of the decision surface) and a margin error is said to have occurred if a training observation lies on the wrong side of the surface or within the margin.\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `transform(mach, Xnew)`: return scores for outlierness, given features `Xnew` having the same scitype as `X` above. The greater the score, the more likely it is an outlier. This score is based on the SVM decision function. For normalized scores, wrap `model` using `ProbabilisticDetector` from OutlierDetection.jl and call `predict` instead, and for threshold-based classification, wrap again using `BinaryThresholdPredictor`. See the examples below.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `orientation`: this equals `1` if the decision function for `libsvm_model` is increasing with increasing outlierness, and `-1` if it is decreasing instead. Correspondingly, the `libsvm_model` attaches `true` to outliers in the first case, and `false` in the second. (The `scores` given in the MLJ report and generated by `MLJ.transform` already correct for this ambiguity, which is therefore only an issue for users directly accessing `libsvm_model`.)\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Generating raw scores for outlierness\n\n```\nusing MLJ\nimport LIBSVM\nimport StableRNGs.StableRNG\n\nOneClassSVM = @load OneClassSVM pkg=LIBSVM # model type\nmodel = OneClassSVM(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nrng = StableRNG(123)\nXmatrix = randn(rng, 5, 3)\nXmatrix[1, 1] = 100.0\nX = MLJ.table(Xmatrix)\n\nmach = machine(model, X) |> fit!\n\n# training scores (outliers have larger scores):\njulia> report(mach).scores\n5-element Vector{Float64}:\n 6.711689156091755e-7\n -6.740101976655081e-7\n -6.711632439648446e-7\n -6.743015858874887e-7\n -6.745393717880104e-7\n\n# scores for new data:\nXnew = MLJ.table(rand(rng, 2, 3))\n\njulia> transform(mach, rand(rng, 2, 3))\n2-element Vector{Float64}:\n -6.746293022511047e-7\n -6.744289265348623e-7\n```\n\n## Generating probabilistic predictions of outlierness\n\nContinuing the previous example:\n\n```\nusing OutlierDetection\npmodel = ProbabilisticDetector(model)\npmach = machine(pmodel, X) |> fit!\n\n# probabilistic predictions on new data:\n\njulia> y_prob = predict(pmach, Xnew)\n2-element UnivariateFiniteVector{OrderedFactor{2}, String, UInt8, Float64}:\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>9.57e-5)\n UnivariateFinite{OrderedFactor{2}}(normal=>1.0, outlier=>0.0)\n\n# probabilities for outlierness:\n\njulia> pdf.(y_prob, \"outlier\")\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n\n# raw scores are still available using `transform`:\n\njulia> transform(pmach, Xnew)\n2-element Vector{Float64}:\n 9.572583265925801e-5\n 0.0\n```\n\n## Outlier classification using a probability threshold:\n\nContinuing the previous example:\n\n```\ndmodel = BinaryThresholdPredictor(pmodel, threshold=0.9)\ndmach = machine(dmodel, X) |> fit!\n\njulia> yhat = predict(dmach, Xnew)\n2-element CategoricalArrays.CategoricalArray{String,1,UInt8}:\n \"normal\"\n \"normal\"\n```\n\n## User-defined kernels\n\nContinuing the first example:\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = OneClassSVM(kernel=k)\nmach = machine(model, X) |> fit!\n\njulia> yhat = transform(mach, Xnew)\n2-element Vector{Float64}:\n -0.4825363352732942\n -0.4848772169720227\n```\n\nSee also [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README). For an alternative source of outlier detection models with an MLJ interface, see [OutlierDetection.jl](https://outlierdetectionjl.github.io/OutlierDetection.jl/dev/).\n""" +":docstring" = """```\nSVC\n```\n\nA model type for constructing a C-support vector classifier, based on [LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSVC = @load SVC pkg=LIBSVM\n```\n\nDo `model = SVC()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SVC(kernel=...)`.\n\nThis model predicts actual class labels. To predict probabilities, use instead [`ProbabilisticSVC`](@ref).\n\nReference for algorithm and core C-library: C.-C. Chang and C.-J. Lin (2011): \"LIBSVM: a library for support vector machines.\" *ACM Transactions on Intelligent Systems and Technology*, 2(3):27:1–27:27. Updated at [https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf](https://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf). \n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nwhere\n\n * `X`: any table of input features (eg, a `DataFrame`) whose columns each have `Continuous` element scitype; check column scitypes with `schema(X)`\n * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n * `w`: a dictionary of class weights, keyed on `levels(y)`.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n * `kernel=LIBSVM.Kernel.RadialBasis`: either an object that can be called, as in `kernel(x1, x2)`, or one of the built-in kernels from the LIBSVM.jl package listed below. Here `x1` and `x2` are vectors whose lengths match the number of columns of the training data `X` (see \"Examples\" below).\n\n * `LIBSVM.Kernel.Linear`: `(x1, x2) -> x1'*x2`\n * `LIBSVM.Kernel.Polynomial`: `(x1, x2) -> gamma*x1'*x2 + coef0)^degree`\n * `LIBSVM.Kernel.RadialBasis`: `(x1, x2) -> (exp(-gamma*norm(x1 - x2)^2))`\n * `LIBSVM.Kernel.Sigmoid`: `(x1, x2) - > tanh(gamma*x1'*x2 + coef0)`\n\n Here `gamma`, `coef0`, `degree` are other hyper-parameters. Serialization of models with user-defined kernels comes with some restrictions. See [LIVSVM.jl issue91](https://github.com/JuliaML/LIBSVM.jl/issues/91)\n * `gamma = 0.0`: kernel parameter (see above); if `gamma==-1.0` then `gamma = 1/nfeatures` is used in training, where `nfeatures` is the number of features (columns of `X`). If `gamma==0.0` then `gamma = 1/(var(Tables.matrix(X))*nfeatures)` is used. Actual value used appears in the report (see below).\n * `coef0 = 0.0`: kernel parameter (see above)\n * `degree::Int32 = Int32(3)`: degree in polynomial kernel (see above)\n\n * `cost=1.0` (range (0, `Inf`)): the parameter denoted $C$ in the cited reference; for greater regularization, decrease `cost`\n * `cachesize=200.0` cache memory size in MB\n * `tolerance=0.001`: tolerance for the stopping criterion\n * `shrinking=true`: whether to use shrinking heuristics\n\n# Operations\n\n * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n * `libsvm_model`: the trained model object created by the LIBSVM.jl package\n * `encoding`: class encoding used internally by `libsvm_model` - a dictionary of class labels keyed on the internal integer representation\n\n# Report\n\nThe fields of `report(mach)` are:\n\n * `gamma`: actual value of the kernel parameter `gamma` used in training\n\n# Examples\n\n## Using a built-in kernel\n\n```\nusing MLJ\nimport LIBSVM\n\nSVC = @load SVC pkg=LIBSVM # model type\nmodel = SVC(kernel=LIBSVM.Kernel.Polynomial) # instance\n\nX, y = @load_iris # table, vector\nmach = machine(model, X, y) |> fit!\n\nXnew = (sepal_length = [6.4, 7.2, 7.4],\n sepal_width = [2.8, 3.0, 2.8],\n petal_length = [5.6, 5.8, 6.1],\n petal_width = [2.1, 1.6, 1.9],)\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## User-defined kernels\n\n```\nk(x1, x2) = x1'*x2 # equivalent to `LIBSVM.Kernel.Linear`\nmodel = SVC(kernel=k)\nmach = machine(model, X, y) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"virginica\"\n \"virginica\"\n \"virginica\"\n```\n\n## Incorporating class weights\n\nIn either scenario above, we can do:\n\n```julia\nweights = Dict(\"virginica\" => 1, \"versicolor\" => 20, \"setosa\" => 1)\nmach = machine(model, X, y, weights) |> fit!\n\njulia> yhat = predict(mach, Xnew)\n3-element CategoricalArrays.CategoricalArray{String,1,UInt32}:\n \"versicolor\"\n \"versicolor\"\n \"versicolor\"\n```\n\nSee also the classifiers [`ProbabilisticSVC`](@ref), [`NuSVC`](@ref) and [`LinearSVC`](@ref). And see [LIVSVM.jl](https://github.com/JuliaML/LIBSVM.jl) and the original C implementation [documentation](https://github.com/cjlin1/libsvm/blob/master/README).\n""" ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" ":package_url" = "https://github.com/mpastell/LIBSVM.jl" ":package_name" = "LIBSVM" -":name" = "OneClassSVM" -":target_in_fit" = "`false`" -":supports_class_weights" = "`false`" +":name" = "SVC" +":target_in_fit" = "`true`" +":supports_class_weights" = "`true`" ":supports_online" = "`false`" -":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":transform"] +":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"] ":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" +":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" +":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`" ":supports_training_losses" = "`false`" ":supports_weights" = "`false`" ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`nothing`" +":is_wrapper" = "`false`" [MLJFlux.EntityEmbedder] -":constructor" = "`nothing`" +":is_wrapper" = "`true`" ":hyperparameter_types" = "`(\"Union{MLJFlux.MLJFluxDeterministic, MLJFlux.MLJFluxProbabilistic}\",)`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing,)`" @@ -9472,10 +9139,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`ScientificTypesBase.Unknown`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`true`" +":constructor" = "`nothing`" [MLJFlux.MultitargetNeuralNetworkRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9509,10 +9176,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9546,10 +9213,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.ImageClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9583,10 +9250,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`AbstractVector{<:ScientificTypesBase.Image}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkBinaryClassifier] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9620,10 +9287,10 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" +":constructor" = "`nothing`" [MLJFlux.NeuralNetworkRegressor] -":constructor" = "`nothing`" +":is_wrapper" = "`false`" ":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Union{Integer, Random.AbstractRNG}\", \"Bool\", \"ComputationalResources.AbstractResource\", \"Dict{Symbol, Real}\")`" ":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845" ":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" @@ -9657,41 +9324,4 @@ ":reports_feature_importances" = "`false`" ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`" ":transform_scitype" = "`ScientificTypesBase.Unknown`" -":is_wrapper" = "`false`" - -[MLJEnsembles.EnsembleModel] -":is_wrapper" = "`true`" -":hyperparameter_types" = "`(\"MLJModelInterface.Probabilistic\", \"Vector{Float64}\", \"Float64\", \"Union{Int64, Random.AbstractRNG}\", \"Int64\", \"ComputationalResources.AbstractResource\", \"Any\")`" -":package_uuid" = "50ed68f4-41fd-4504-931a-ed422449fee0" -":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing)`" -":reporting_operations" = "`()`" -":fit_data_scitype" = "`Tuple{ScientificTypesBase.Unknown, ScientificTypesBase.Unknown}`" -":output_scitype" = "`ScientificTypesBase.Unknown`" -":tags" = [] -":abstract_type" = "`MLJModelInterface.Probabilistic`" -":package_license" = "unknown" -":prediction_type" = ":probabilistic" -":load_path" = "MLJEnsembles.EnsembleModel" -":hyperparameters" = "`(:model, :atomic_weights, :bagging_fraction, :rng, :n, :acceleration, :out_of_bag_measure)`" -":is_pure_julia" = "`false`" -":human_name" = "probabilistic ensemble model" -":is_supervised" = "`true`" -":iteration_parameter" = "`nothing`" -":docstring" = """```\nEnsembleModel(model,\n atomic_weights=Float64[],\n bagging_fraction=0.8,\n n=100,\n rng=GLOBAL_RNG,\n acceleration=CPU1(),\n out_of_bag_measure=[])\n```\n\nCreate a model for training an ensemble of `n` clones of `model`, with optional bagging. Ensembling is useful if `fit!(machine(atom, data...))` does not create identical models on repeated calls (ie, is a stochastic model, such as a decision tree with randomized node selection criteria), or if `bagging_fraction` is set to a value less than 1.0, or both.\n\nHere the atomic `model` must support targets with scitype `AbstractVector{<:Finite}` (single-target classifiers) or `AbstractVector{<:Continuous}` (single-target regressors).\n\nIf `rng` is an integer, then `MersenneTwister(rng)` is the random number generator used for bagging. Otherwise some `AbstractRNG` object is expected.\n\nThe atomic predictions are optionally weighted according to the vector `atomic_weights` (to allow for external optimization) except in the case that `model` is a `Deterministic` classifier, in which case `atomic_weights` are ignored.\n\nThe ensemble model is `Deterministic` or `Probabilistic`, according to the corresponding supertype of `atom`. In the case of deterministic classifiers (`target_scitype(atom) <: Abstract{<:Finite}`), the predictions are majority votes, and for regressors (`target_scitype(atom)<: AbstractVector{<:Continuous}`) they are ordinary averages. Probabilistic predictions are obtained by averaging the atomic probability distribution/mass functions; in particular, for regressors, the ensemble prediction on each input pattern has the type `MixtureModel{VF,VS,D}` from the Distributions.jl package, where `D` is the type of predicted distribution for `atom`.\n\nSpecify `acceleration=CPUProcesses()` for distributed computing, or `CPUThreads()` for multithreading.\n\nIf a single measure or non-empty vector of measures is specified by `out_of_bag_measure`, then out-of-bag estimates of performance are written to the training report (call `report` on the trained machine wrapping the ensemble model).\n\n*Important:* If per-observation or class weights `w` (not to be confused with atomic weights) are specified when constructing a machine for the ensemble model, as in `mach = machine(ensemble_model, X, y, w)`, then `w` is used by any measures specified in `out_of_bag_measure` that support them.\n""" -":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`" -":package_url" = "https://github.com/JuliaAI/MLJEnsembles.jl" -":package_name" = "MLJEnsembles" -":name" = "EnsembleModel" -":target_in_fit" = "`true`" -":supports_class_weights" = "`false`" -":supports_online" = "`false`" -":implemented_methods" = [] -":deep_properties" = "`()`" -":predict_scitype" = "`ScientificTypesBase.Unknown`" -":target_scitype" = "`ScientificTypesBase.Unknown`" -":supports_training_losses" = "`false`" -":supports_weights" = "`false`" -":reports_feature_importances" = "`false`" -":input_scitype" = "`ScientificTypesBase.Unknown`" -":transform_scitype" = "`ScientificTypesBase.Unknown`" -":constructor" = "`EnsembleModel`" +":constructor" = "`nothing`" diff --git a/test/builtins/ThresholdPredictors.jl b/test/builtins/ThresholdPredictors.jl index 51f2aaf..30b0cc7 100644 --- a/test/builtins/ThresholdPredictors.jl +++ b/test/builtins/ThresholdPredictors.jl @@ -1,5 +1,5 @@ module TestThresholdPredictors -using Test, MLJModels, CategoricalArrays +using Test, MLJModels, MLJTransforms, CategoricalArrays using ScientificTypes using CategoricalDistributions From a0bc628247e5a17359581f9da40ba48ae6e6cefe Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 22 Aug 2025 19:37:12 +1200 Subject: [PATCH 19/22] bump 0.18.0 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 53f394c..eb62c0c 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.17.9" +version = "0.18.0" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" From 4b60ef46da411557917d3d9890674791cf066882 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 22 Aug 2025 20:22:24 +1200 Subject: [PATCH 20/22] update readme --- README.md | 89 +++++++++++++++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index a8f4ebe..2fb1892 100644 --- a/README.md +++ b/README.md @@ -3,77 +3,74 @@ [![Build Status](https://github.com/JuliaAI/MLJModels.jl/workflows/CI/badge.svg)](https://github.com/JuliaAI/MLJModels.jl/actions) [![codecov](https://codecov.io/gh/JuliaAI/MLJModels.jl/graph/badge.svg?token=KgarnnCc0K)](https://codecov.io/gh/JuliaAI/MLJModels.jl) -Repository of the "built-in" models available for use in the -[MLJ](https://github.com/JuliaAI/MLJ.jl) MLJ machine -learning framework; and the home of the MLJ model registry. +Home of the [MLJ](https://juliaml.ai) Model Registry and tools for model search and model code loading. -For instructions on integrating a new model with MLJ visit -[here](https://JuliaAI.github.io/MLJ.jl/dev/adding_models_for_general_use/) +For instructions on integrating a new model into MLJ visit +[here](https://juliaai.github.io/MLJModelInterface.jl/stable/). ### Contents - [Who is this repo for?](#who-is-this-repo-for) + - [How to register new models](#how-to-register-new-models) - [What is provided here?](#what-is-provided-here) - - [Instructions for updating the MLJ model registry](#instructions-for-updating-the-mlj-model-registry) ## Who is this repo for? -General users of the MLJ machine learning platform should refer to -[MLJ home page](https://JuliaAI.github.io/MLJ.jl/dev/) -for usage and installation instructions. MLJModels is a dependency of -MLJ that the general user can ignore. +Newcomers to MLJ should refer to [this page](https://juliaml.ai) for usage and +installation instructions. MLJModels.jl is a dependency of MLJ that the general user can +ignore. -This repository is for developers wishing to -[register](#instructions-for-updating-the-mlj-model-registry) new MLJ -model interfaces, whether they be: +This repository is for developers maintaining: -- implemented **natively** in a - package providing the core machine learning algorithm, as in +- The [MLJ Model Registry](/src/registry), a database of packages implementing the MLJ + interface for machine learning models, together with metadata about those models. + +- MLJ tools for searching the database (`models(...)` and `matching(...)`) and for loading + model code (`@load`, `@iload`). + +## How to register new models + +The model registry lives at "/src/registry" but +is maintained using +[MLJModelRegistryTools.jl](https://juliaai.github.io/MLJModelRegistryTools.jl/dev/). + +New MLJ model interfaces can be implemented either: + +- **natively** in a package providing the core machine learning algorithm, as in [`EvoTrees.jl`](https://github.com/Evovest/EvoTrees.jl/blob/master/src/MLJ.jl); or -- implemented in a separate **interface package**, such as +- in a separate **interface package**, such as [MLJDecisionTreeInterface.jl](https://github.com/JuliaAI/MLJDecisionTreeInterface.jl). -It also a place for developers to add models (mostly transformers) -such as `OneHotEncoder`, that are exported for "built-in" use in -MLJ. (In the future these models may live in a separate package.) - -To list *all* model interfaces currently registered, do `using MLJ` or -`using MLJModels` and run: +In either case, the package providing the implementation needs to be added to the MLJ +Model Registry to make it discoverable by MLJ users, and to make the model metadata +searchable. To register a package, prepare a pull request to MLJModels.jl by following [these instructions](https://juliaai.github.io/MLJModelRegistryTools.jl/dev/registry_management_tools/#Registry-management-tools). -- `localmodels()` to list built-in models (updated when external models are loaded with `@load`) +Currently, after registering the model, one must also make a PR to MLJ updating [this +dictionary of model +descriptors](https://github.com/JuliaAI/MLJ.jl/blob/dev/docs/ModelDescriptors.toml) to +ensure the new models appear in the right places in MLJ's [Model +Browser](https://JuliaAI.github.io/MLJ.jl/dev/model_browser/#Model-Browser) -- `models()` to list all registered models, or see [this list](/src/registry/Models.toml). +To list *all* model interfaces currently registered, do `using MLJ` or `using MLJModels` +and run `models()` to list all registered models. Recall that an interface is loaded from within MLJ, together with the package providing the underlying algorithm, using the syntax `@load RidgeRegressor pkg=GLM`, where the `pkg` keyword is only necessary in ambiguous cases. - ## What is provided here? -MLJModels contains: - -- transformers to be pre-loaded into MLJ, located at - [/src/builtins](/src/builtins), such as `OneHotEncoder` - and `ConstantClassifier`. - -- the MLJ [model registry](src/registry/Metadata.toml), listing all - models that can be called from MLJ using `@load`. Package developers - can register new models by implementing the MLJ interface in their - package and following [these - instructions](https://JuliaAI.github.io/MLJ.jl/dev/adding_models_for_general_use/). - - -## Instructions for updating the MLJ model registry +The actual MLJ Model Registry consists of the TOML files in [this +directory](/src/registry). A few models available for immediate use in MLJ (without +loading model code using `@load`) are also provided by this package, under "/src/builtins" +but these may be moved out in the future. -Generally model registration is performed by administrators. If you -have an interface you would like registered, open an issue -[here](https://github.com/JuliaAI/MLJ.jl/issues). +### Historical note -**Administrator instructions.** These are given in the -`MLJModels.@update` document string. After registering the model, make a PR to MLJ -updating [this dictionary of model descriptors](https://github.com/JuliaAI/MLJ.jl/blob/dev/docs/ModelDescriptors.toml) -to ensure the new models appear in the right places in MLJ's [Model Browser](https://JuliaAI.github.io/MLJ.jl/dev/model_browser/#Model-Browser) +Older versions of MLJModels.jl contained some of the models now residing at +[MLJTransforms.jl](https://github.com/JuliaAI/MLJTransforms.jl/tree/dev). Even older +versions provided implementations of all the non-native implementations of the +MLJ interface. From d3d068bb8e2f610a0c5f041cdb58aef54b49f6d3 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 22 Aug 2025 22:29:41 +1200 Subject: [PATCH 21/22] rm accidentally added MLJModelRegistryTools from ./Project.toml --- Project.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Project.toml b/Project.toml index eb62c0c..7294d79 100644 --- a/Project.toml +++ b/Project.toml @@ -13,7 +13,6 @@ Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" -MLJModelRegistryTools = "0a96183e-380b-4aa6-be10-c555140810f2" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" @@ -39,7 +38,6 @@ Distributions = "0.25" InteractiveUtils = "1" LinearAlgebra = "1" MLJModelInterface = "1.10" -MLJModelRegistryTools = "0.1.1" MLJTransforms = "0.1.1" Markdown = "1" OrderedCollections = "1.1" From 93ad936a7d052f2813db3e8ea54905d773a75d63 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 22 Aug 2025 23:18:46 +1200 Subject: [PATCH 22/22] fix bug in CI --- .github/workflows/ci.yml | 2 +- test/runtests.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3cd76a9..55d4ce2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,7 +43,7 @@ jobs: - uses: julia-actions/julia-runtest@v1 env: # This environment variable enables the integration tests: - MLJ_TEST_INTEGRATION: '1' + MLJ_TEST_REGISTRY: '1' - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v4 with: diff --git a/test/runtests.jl b/test/runtests.jl index b4ebfd9..da95571 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -30,5 +30,5 @@ if parse(Bool, get(ENV, "MLJ_TEST_REGISTRY", "false")) else @info "Test of the MLJ Registry is being skipped. Set environment variable "* "MLJ_TEST_REGISTRY = \"true\" to include them.\n"* - "The Registry test takes at least one hour. " + "The Registry test takes about ten minutes. " end