From 8598a93cfa3f355f7ce2b2d83c8bcc67216498ce Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Sun, 2 Mar 2025 21:36:03 +1300
Subject: [PATCH 1/2] in registry fix EntityEmbedder; update Evo models with
 :device

---
 src/registry/Metadata.toml | 276 ++++++++++++++++++-------------------
 src/registry/Models.toml   |   5 +-
 2 files changed, 140 insertions(+), 141 deletions(-)

diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml
index e8d4551..7bfe31b 100644
--- a/src/registry/Metadata.toml
+++ b/src/registry/Metadata.toml
@@ -719,114 +719,6 @@
 ":reporting_operations" = "`()`"
 ":constructor" = "`nothing`"
 
-[GLM.LinearBinaryClassifier]
-":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`"
-":output_scitype" = "`ScientificTypesBase.Unknown`"
-":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`"
-":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`"
-":predict_scitype" = "`ScientificTypesBase.Unknown`"
-":transform_scitype" = "`ScientificTypesBase.Unknown`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":target_in_fit" = "`true`"
-":is_pure_julia" = "`true`"
-":package_name" = "GLM"
-":package_license" = "MIT"
-":load_path" = "MLJGLMInterface.LinearBinaryClassifier"
-":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
-":package_url" = "https://github.com/JuliaStats/GLM.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`true`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `link=GLM.LogitLink`: The function which links the linear prediction function to the  probability of a particular outcome or class. This must have type `GLM.Link01`. Options  include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned  above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features used during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n        RW = [5.1, 25.7, 6.4],\n        CL = [15.9, 46.7, 14.3],\n        CW = [18.7, 59.7, 12.2],\n        BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n"""
-":name" = "LinearBinaryClassifier"
-":human_name" = "linear binary classifier"
-":is_supervised" = "`true`"
-":prediction_type" = ":probabilistic"
-":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"]
-":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`"
-":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-":constructor" = "`nothing`"
-
-[GLM.LinearCountRegressor]
-":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`"
-":output_scitype" = "`ScientificTypesBase.Unknown`"
-":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`"
-":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`"
-":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`"
-":transform_scitype" = "`ScientificTypesBase.Unknown`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":target_in_fit" = "`true`"
-":is_pure_julia" = "`true`"
-":package_name" = "GLM"
-":package_license" = "MIT"
-":load_path" = "MLJGLMInterface.LinearCountRegressor"
-":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
-":package_url" = "https://github.com/JuliaStats/GLM.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`true`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the  model should fit.\n  * `link=GLM.LogLink()`: The function which links the linear prediction function to the  probability of a particular outcome or class. This should be one of the following:  `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`,  `GLM.SqrtLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having  the same Scitype as `X` above. Predictions are probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n    x = Xmat[i, :]\n    rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n  0.9969008753103842\n -2.0255901752504775\n  3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"""
-":name" = "LinearCountRegressor"
-":human_name" = "linear count regressor"
-":is_supervised" = "`true`"
-":prediction_type" = ":probabilistic"
-":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"]
-":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`"
-":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-":constructor" = "`nothing`"
-
-[GLM.LinearRegressor]
-":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`"
-":output_scitype" = "`ScientificTypesBase.Unknown`"
-":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`"
-":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`"
-":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`"
-":transform_scitype" = "`ScientificTypesBase.Unknown`"
-":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
-":target_in_fit" = "`true`"
-":is_pure_julia" = "`true`"
-":package_name" = "GLM"
-":package_license" = "MIT"
-":load_path" = "MLJGLMInterface.LinearRegressor"
-":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
-":package_url" = "https://github.com/JuliaStats/GLM.jl"
-":is_wrapper" = "`false`"
-":supports_weights" = "`true`"
-":supports_class_weights" = "`false`"
-":supports_online" = "`false`"
-":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false, no intercept will be calculated (e.g. the data is expected  to be centered)\n  * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence.  If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a variable which is known to have a coefficient of 1.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new  features `Xnew` having the same Scitype as `X` above. Predictions are  probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of  each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of  each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"""
-":name" = "LinearRegressor"
-":human_name" = "linear regressor"
-":is_supervised" = "`true`"
-":prediction_type" = ":probabilistic"
-":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"]
-":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`"
-":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`"
-":iteration_parameter" = "`nothing`"
-":supports_training_losses" = "`false`"
-":reports_feature_importances" = "`false`"
-":deep_properties" = "`()`"
-":reporting_operations" = "`()`"
-":constructor" = "`nothing`"
-
 [CatBoost.CatBoostRegressor]
 ":input_scitype" = "`Union{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}, AbstractVector{<:ScientificTypesBase.OrderedFactor}, AbstractVector{<:ScientificTypesBase.Multiclass}}}, AbstractMatrix{ScientificTypesBase.Continuous}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
@@ -6642,16 +6534,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = """EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"""
+":docstring" = """EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n  * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `tree_type=:binary`    Tree structure to be used. One of:\n\n      * `:binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `:oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `:gpu`.\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"""
 ":name" = "EvoTreeClassifier"
 ":human_name" = "evo tree classifier"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"]
+":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng, :device)`"
+":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -6678,16 +6570,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = """EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"""
+":docstring" = """EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `tree_type=:binary`    Tree structure to be used. One of:\n\n      * `:binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `:oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`.\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"""
 ":name" = "EvoTreeGaussian"
 ":human_name" = "evo tree gaussian"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"]
+":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`"
+":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -6714,16 +6606,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = """EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:gaussian` / `:gaussian_mle`\n  * `:logistic` / `:logistic_mle`\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.  \n\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"""
+":docstring" = """EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n  * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:gaussian_mle`\n  * `:logistic_mle`\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.  \n\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `tree_type=:binary`    Tree structure to be used. One of:\n\n      * `:binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `:oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`. Following losses are not GPU supported at the moment: `:logistic_mle`.\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"""
 ":name" = "EvoTreeMLE"
 ":human_name" = "evo tree mle"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = []
-":hyperparameters" = "`(:nrounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"]
+":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`"
+":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -6750,16 +6642,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = """EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n  * `loss=:mse`:         Loss to be be minimized during training. One of:\n\n      * `:mse`\n      * `:logloss`\n      * `:gamma`\n      * `:tweedie`\n      * `:quantile`\n      * `:l1`\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `alpha::T=0.5`:         Loss specific parameter in the [0, 1] range:                           - `:quantile`: target quantile for the regression.                           - `:l1`: weighting parameters to positive vs negative residuals.                                 - Positive residual weights = `alpha`                                 - Negative residual weights = `(1 - alpha)`\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with     `mach = machine(model, X, y)` where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n"""
+":docstring" = """EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n  * `loss=:mse`:         Loss to be be minimized during training. One of:\n\n      * `:mse`\n      * `:mae`\n      * `:logloss`\n      * `:gamma`\n      * `:tweedie`\n      * `:quantile`\n      * `:cred_var`: **experimental** credibility-based gains, derived from ratio of spread to process variance.\n      * `:cred_std`: **experimental** credibility-based gains, derived from ratio of spread to process std deviation.\n  * `metric`:     The evaluation metric used to track evaluation data and serves as a basis for early stopping. Supported metrics are: \n\n      * `:mse`:     Mean-squared error. Adapted for general regression models.\n      * `:rmse`:    Root-mean-squared error. Adapted for general regression models.\n      * `:mae`:     Mean absolute error. Adapted for general regression models.\n      * `:logloss`: Adapted for `:logistic` regression models.\n      * `:poisson`: Poisson deviance. Adapted to `EvoTreeCount` count models.\n      * `:gamma`:   Gamma deviance. Adapted to regression problem on Gamma like, positively distributed targets.\n      * `:tweedie`: Tweedie deviance. Adapted to regression problem on Tweedie like, positively distributed targets with probability mass at `y == 0`.\n      * `:quantile`: The corresponds to an assymetric absolute error, where residuals are penalized according to alpha / (1-alpha) according to their sign.\n      * `:gini`: The normalized Gini between pred and target\n  * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `alpha::T=0.5`:         Loss specific parameter in the [0, 1] range:                           - `:quantile`: target quantile for the regression.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n  * `tree_type=:binary`    Tree structure to be used. One of:\n\n      * `:binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `:oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `gpu`.\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with     `mach = machine(model, X, y)` where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n"""
 ":name" = "EvoTreeRegressor"
 ":human_name" = "evo tree regressor"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":deterministic"
 ":abstract_type" = "`MLJModelInterface.Deterministic`"
-":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"]
+":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`"
+":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -6786,16 +6678,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = """EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with     mach = machine(model, X, y) where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"""
+":docstring" = """EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n  * `early_stopping_rounds::Integer`: number of consecutive rounds without metric improvement after which fitting in stopped.\n  * `nrounds=100`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `L2::T=0.0`:            L2 regularization factor on aggregate gain. Must be >= 0. Higher L2 can result in a more robust model.\n  * `lambda::T=0.0`:        L2 regularization factor on individual gain. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=6`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `nbins=64`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n  * `tree_type=:binary`    Tree structure to be used. One of:\n\n      * `:binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `:oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n  * `device=:cpu`: Hardware device to use for computations. Can be either `:cpu` or `:gpu`.\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Vector` of length `nobs`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with     mach = machine(model, X, y) where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"""
 ":name" = "EvoTreeCount"
 ":human_name" = "evo tree count"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
-":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":implemented_methods" = [":show", ":fit", ":predict", ":update", ":feature_importances"]
+":hyperparameters" = "`(:loss, :metric, :nrounds, :early_stopping_rounds, :L2, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng, :device)`"
+":hyperparameter_types" = "`(\"Symbol\", \"Symbol\", \"Int64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Dict{Int64, Int64}\", \"Symbol\", \"Random.AbstractRNG\", \"Symbol\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -8675,7 +8567,115 @@
 ":reporting_operations" = "`()`"
 ":constructor" = "`nothing`"
 
-[MLJTransforms.EntityEmbedder]
+[GLM.LinearBinaryClassifier]
+":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Binary}`"
+":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{<:ScientificTypesBase.Binary}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`"
+":predict_scitype" = "`ScientificTypesBase.Unknown`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":target_in_fit" = "`true`"
+":is_pure_julia" = "`true`"
+":package_name" = "GLM"
+":package_license" = "MIT"
+":load_path" = "MLJGLMInterface.LinearBinaryClassifier"
+":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+":package_url" = "https://github.com/JuliaStats/GLM.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`true`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = """```\nLinearBinaryClassifier\n```\n\nA model type for constructing a linear binary classifier, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\n```\n\nDo `model = LinearBinaryClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearBinaryClassifier(fit_intercept=...)`.\n\n`LinearBinaryClassifier` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a binary target variable, with a user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor(2)` or `<:Multiclass(2)`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `link=GLM.LogitLink`: The function which links the linear prediction function to the  probability of a particular outcome or class. This must have type `GLM.Link01`. Options  include `GLM.LogitLink()`, `GLM.ProbitLink()`, `CloglogLink(),`CauchitLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned  above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features used during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport GLM # namespace must be available\n\nLinearBinaryClassifier = @load LinearBinaryClassifier pkg=GLM\nclf = LinearBinaryClassifier(fit_intercept=false, link=GLM.ProbitLink())\n\nX, y = @load_crabs\n\nmach = machine(clf, X, y) |> fit!\n\nXnew = (;FL = [8.1, 24.8, 7.2],\n        RW = [5.1, 25.7, 6.4],\n        CL = [15.9, 46.7, 14.3],\n        CW = [18.7, 59.7, 12.2],\n        BD = [6.2, 23.6, 8.4],)\n\nyhat = predict(mach, Xnew) # probabilistic predictions\npdf(yhat, levels(y)) # probability matrix\np_B = pdf.(yhat, \"B\")\nclass_labels = predict_mode(mach, Xnew)\n\nfitted_params(mach).features\nfitted_params(mach).coef\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearCountRegressor`](@ref)\n"""
+":name" = "LinearBinaryClassifier"
+":human_name" = "linear binary classifier"
+":is_supervised" = "`true`"
+":prediction_type" = ":probabilistic"
+":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict"]
+":hyperparameters" = "`(:fit_intercept, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`"
+":hyperparameter_types" = "`(\"Bool\", \"GLM.Link01\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+":constructor" = "`nothing`"
+
+[GLM.LinearCountRegressor]
+":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{ScientificTypesBase.Count}`"
+":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Count}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`"
+":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Count}}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":target_in_fit" = "`true`"
+":is_pure_julia" = "`true`"
+":package_name" = "GLM"
+":package_license" = "MIT"
+":load_path" = "MLJGLMInterface.LinearCountRegressor"
+":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+":package_url" = "https://github.com/JuliaStats/GLM.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`true`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = """```\nLinearCountRegressor\n```\n\nA model type for constructing a linear count regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearCountRegressor = @load LinearCountRegressor pkg=GLM\n```\n\nDo `model = LinearCountRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearCountRegressor(fit_intercept=...)`.\n\n`LinearCountRegressor` is a [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model#Variance_function), specialised to the case of a `Count` target variable (non-negative, unbounded integer) with user-specified link function. Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Count`; check the scitype with `schema(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model. If set to false,  no intercept will be calculated (e.g. the data is expected to be centered)\n  * `distribution=Distributions.Poisson()`: The distribution which the residuals/errors of the  model should fit.\n  * `link=GLM.LogLink()`: The function which links the linear prediction function to the  probability of a particular outcome or class. This should be one of the following:  `GLM.IdentityLink()`, `GLM.InverseLink()`, `GLM.InverseSquareLink()`, `GLM.LogLink()`,  `GLM.SqrtLink()`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a  variable which is known to have a coefficient of 1.\n  * `maxiter::Integer=30`: The maximum number of iterations allowed to achieve convergence.\n  * `atol::Real=1e-6`: Absolute threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `rtol::Real=1e-6`: Relative threshold for convergence. Convergence is achieved when the  relative change in deviance is less than `max(rtol*dev, atol). This term exists to avoid  failure when deviance is unchanged except for rounding errors.\n  * `minstepfac::Real=0.001`: Minimum step fraction. Must be between 0 and 1. Lower bound for the factor used to update the linear fit.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new features `Xnew` having  the same Scitype as `X` above. Predictions are probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nimport MLJ.Distributions.Poisson\n\n# Generate some data whose target y looks Poisson when conditioned on\n# X:\nN = 10_000\nw = [1.0, -2.0, 3.0]\nmu(x) = exp(w'x) # mean for a log link function\nXmat = rand(N, 3)\nX = MLJ.table(Xmat)\ny = map(1:N) do i\n    x = Xmat[i, :]\n    rand(Poisson(mu(x)))\nend;\n\nCountRegressor = @load LinearCountRegressor pkg=GLM\nmodel = CountRegressor(fit_intercept=false)\nmach = machine(model, X, y)\nfit!(mach)\n\nXnew = MLJ.table(rand(3, 3))\nyhat = predict(mach, Xnew)\nyhat_point = predict_mean(mach, Xnew)\n\n# get coefficients approximating `w`:\njulia> fitted_params(mach).coef\n3-element Vector{Float64}:\n  0.9969008753103842\n -2.0255901752504775\n  3.014407534033522\n\nreport(mach)\n```\n\nSee also [`LinearRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"""
+":name" = "LinearCountRegressor"
+":human_name" = "linear count regressor"
+":is_supervised" = "`true`"
+":prediction_type" = ":probabilistic"
+":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"]
+":hyperparameters" = "`(:fit_intercept, :distribution, :link, :offsetcol, :maxiter, :atol, :rtol, :minstepfac, :report_keys)`"
+":hyperparameter_types" = "`(\"Bool\", \"Distributions.Distribution\", \"GLM.Link\", \"Union{Nothing, Symbol}\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Union{Nothing, AbstractVector{Symbol}}\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+":constructor" = "`nothing`"
+
+[GLM.LinearRegressor]
+":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{ScientificTypesBase.Continuous}`"
+":fit_data_scitype" = "`Union{Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}}, Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Finite}}}, AbstractVector{ScientificTypesBase.Continuous}, AbstractVector{<:Union{ScientificTypesBase.Continuous, ScientificTypesBase.Count}}}}`"
+":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{ScientificTypesBase.Continuous}}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":target_in_fit" = "`true`"
+":is_pure_julia" = "`true`"
+":package_name" = "GLM"
+":package_license" = "MIT"
+":load_path" = "MLJGLMInterface.LinearRegressor"
+":package_uuid" = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
+":package_url" = "https://github.com/JuliaStats/GLM.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`true`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = """```\nLinearRegressor\n```\n\nA model type for constructing a linear regressor, based on [GLM.jl](https://github.com/JuliaStats/GLM.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nLinearRegressor = @load LinearRegressor pkg=GLM\n```\n\nDo `model = LinearRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `LinearRegressor(fit_intercept=...)`.\n\n`LinearRegressor` assumes the target is a continuous variable whose conditional distribution is normal with constant variance, and whose expected value is a linear combination of the features (identity link function). Options exist to specify an intercept or offset feature.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with one of:\n\n```\nmach = machine(model, X, y)\nmach = machine(model, X, y, w)\n```\n\nHere\n\n  * `X`: is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check the scitype with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `Continuous`; check the scitype with `scitype(y)`\n  * `w`: is a vector of `Real` per-observation weights\n\n# Hyper-parameters\n\n  * `fit_intercept=true`: Whether to calculate the intercept for this model.  If set to false, no intercept will be calculated (e.g. the data is expected  to be centered)\n  * `dropcollinear=false`: Whether to drop features in the training data to ensure linear independence.  If true , only the first of each set of linearly-dependent features is used. The coefficient for redundant linearly dependent features is `0.0` and all associated statistics are set to `NaN`.\n  * `offsetcol=nothing`: Name of the column to be used as an offset, if any.  An offset is a variable which is known to have a coefficient of 1.\n  * `report_keys`: `Vector` of keys for the report. Possible keys are: `:deviance`, `:dof_residual`, `:stderror`, `:vcov`, `:coef_table` and `:glm_model`. By default only `:glm_model` is excluded.\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given new  features `Xnew` having the same Scitype as `X` above. Predictions are  probabilistic.\n  * `predict_mean(mach, Xnew)`: instead return the mean of  each prediction above\n  * `predict_median(mach, Xnew)`: instead return the median of  each prediction above.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `features`: The names of the features encountered during model fitting.\n  * `coef`: The linear coefficients determined by the model.\n  * `intercept`: The intercept determined by the model.\n\n# Report\n\nWhen all keys are enabled in `report_keys`, the following fields are available in `report(mach)`:\n\n  * `deviance`: Measure of deviance of fitted model with respect to a perfectly fitted model. For a linear model, this is the weighted residual sum of squares\n  * `dof_residual`: The degrees of freedom for residuals, when meaningful.\n  * `stderror`: The standard errors of the coefficients.\n  * `vcov`: The estimated variance-covariance matrix of the coefficient estimates.\n  * `coef_table`: Table which displays coefficients and summarizes their significance and confidence intervals.\n  * `glm_model`: The raw fitted model returned by `GLM.lm`. Note this points to training data. Refer to the GLM.jl documentation for usage.\n\n# Examples\n\n```\nusing MLJ\nLinearRegressor = @load LinearRegressor pkg=GLM\nglm = LinearRegressor()\n\nX, y = make_regression(100, 2) # synthetic data\nmach = machine(glm, X, y) |> fit!\n\nXnew, _ = make_regression(3, 2)\nyhat = predict(mach, Xnew) # new predictions\nyhat_point = predict_mean(mach, Xnew) # new predictions\n\nfitted_params(mach).features\nfitted_params(mach).coef # x1, x2, intercept\nfitted_params(mach).intercept\n\nreport(mach)\n```\n\nSee also [`LinearCountRegressor`](@ref), [`LinearBinaryClassifier`](@ref)\n"""
+":name" = "LinearRegressor"
+":human_name" = "linear regressor"
+":is_supervised" = "`true`"
+":prediction_type" = ":probabilistic"
+":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":implemented_methods" = [":clean!", ":fit", ":fitted_params", ":predict", ":predict_mean"]
+":hyperparameters" = "`(:fit_intercept, :dropcollinear, :offsetcol, :report_keys)`"
+":hyperparameter_types" = "`(\"Bool\", \"Bool\", \"Union{Nothing, Symbol}\", \"Union{Nothing, AbstractVector{Symbol}}\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+":constructor" = "`nothing`"
+
+[MLJFlux.EntityEmbedder]
 ":input_scitype" = "`ScientificTypesBase.Unknown`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
 ":target_scitype" = "`ScientificTypesBase.Unknown`"
@@ -8685,11 +8685,11 @@
 ":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
 ":target_in_fit" = "`true`"
 ":is_pure_julia" = "`true`"
-":package_name" = "MLJTransforms"
+":package_name" = "MLJFlux"
 ":package_license" = "unknown"
-":load_path" = "MLJTransforms.EntityEmbedder"
-":package_uuid" = "23777cdb-d90c-4eb0-a694-7c2b83d5c1d6"
-":package_url" = "https://github.com/JuliaAI/MLJTransforms.jl"
+":load_path" = "MLJFlux.EntityEmbedder"
+":package_uuid" = "094fc8d1-fd35-5302-93ea-dabda2abf845"
+":package_url" = "https://github.com/FluxML/MLJFlux.jl"
 ":is_wrapper" = "`true`"
 ":supports_weights" = "`false`"
 ":supports_class_weights" = "`false`"
diff --git a/src/registry/Models.toml b/src/registry/Models.toml
index 1de1c8a..76ddb7e 100644
--- a/src/registry/Models.toml
+++ b/src/registry/Models.toml
@@ -1,5 +1,4 @@
 BetaML = ["RandomForestRegressor", "GaussianMixtureImputer", "RandomForestClassifier", "RandomForestImputer", "PerceptronClassifier", "AutoEncoder", "DecisionTreeRegressor", "PegasosClassifier", "KMeansClusterer", "NeuralNetworkRegressor", "MultitargetGaussianMixtureRegressor", "GaussianMixtureRegressor", "MultitargetNeuralNetworkRegressor", "DecisionTreeClassifier", "GeneralImputer", "NeuralNetworkClassifier", "SimpleImputer", "GaussianMixtureClusterer", "KernelPerceptronClassifier", "KMedoidsClusterer"]
-MLJEnsembles = ["EnsembleModel"]
 CatBoost = ["CatBoostRegressor", "CatBoostClassifier"]
 NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"]
 MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "HDBSCAN", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "HistGradientBoostingClassifier", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "BernoulliNBClassifier", "PassiveAggressiveClassifier", "RidgeCVRegressor", "SVMRegressor", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "GaussianProcessClassifier", "BaggingClassifier", "OPTICS", "RANSACRegressor", "KNeighborsRegressor", "HistGradientBoostingRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "BisectingKMeans", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"]
@@ -33,5 +32,5 @@ SelfOrganizingMaps = ["SelfOrganizingMap"]
 LIBSVM = ["SVC", "EpsilonSVR", "LinearSVC", "ProbabilisticSVC", "NuSVR", "NuSVC", "ProbabilisticNuSVC", "OneClassSVM"]
 TSVD = ["TSVDTransformer"]
 GLM = ["LinearBinaryClassifier", "LinearCountRegressor", "LinearRegressor"]
-MLJTransforms = ["EntityEmbedder"]
-MLJFlux = ["MultitargetNeuralNetworkRegressor", "NeuralNetworkClassifier", "ImageClassifier", "NeuralNetworkBinaryClassifier", "NeuralNetworkRegressor"]
+MLJFlux = ["EntityEmbedder", "MultitargetNeuralNetworkRegressor", "NeuralNetworkClassifier", "ImageClassifier", "NeuralNetworkBinaryClassifier", "NeuralNetworkRegressor"]
+MLJEnsembles = ["EnsembleModel"]

From 66f8b68a4dac4fc7b6e3dd2037cfb99fd51b995a Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Sun, 2 Mar 2025 21:38:36 +1300
Subject: [PATCH 2/2] bump 0.17.8

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index cd0cb0a..322ed9c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "MLJModels"
 uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
 authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.17.7"
+version = "0.17.8"
 
 [deps]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"