diff --git a/Project.toml b/Project.toml index 315d615..aeedc6a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MLJModels" uuid = "d491faf4-2d78-11e9-2867-c94bc002c0b7" authors = ["Anthony D. Blaom "] -version = "0.18.2" +version = "0.18.3" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" diff --git a/src/builtins/Constant.jl b/src/builtins/Constant.jl index e546307..7849979 100644 --- a/src/builtins/Constant.jl +++ b/src/builtins/Constant.jl @@ -24,23 +24,41 @@ MLJModelInterface.predict(::ConstantRegressor, fitresult, Xnew) = fill(fitresult, nrows(Xnew)) ## -## THE CONSTANT DETERMINISTIC REGRESSOR (FOR TESTING) +## THE CONSTANT DETERMINISTIC REGRESSOR ## +# helpers: +_mean(y) = _mean(y, scitype(y)) +_mean(y, ::Type{<:AbstractArray}) = mean(y, dims=1) +_mean(y, ::Type{<:Table}) = _mean(Tables.matrix(y), AbstractArray) +_materializer(y) = _materializer(y, scitype(y)) +_materializer(y, ::Type{<:AbstractMatrix}) = identity +_materializer(y, ::Type{<:AbstractVector}) = vec +function _materializer(y, ::Type{<:Table}) + names = Tables.columnnames(Tables.columntable(y)) + Tables.materializer(y)∘(matrix->Tables.table(matrix; header=names)) +end + struct DeterministicConstantRegressor <: Deterministic end function MLJModelInterface.fit(::DeterministicConstantRegressor, verbosity::Int, X, y) - fitresult = mean(y) + μ = _mean(y) + materializer = _materializer(y) + fitresult = (; μ, materializer) cache = nothing report = NamedTuple() return fitresult, cache, report end MLJModelInterface.predict(::DeterministicConstantRegressor, fitresult, Xnew) = - fill(fitresult, nrows(Xnew)) + hcat([fill(fitresult.μ[i], nrows(Xnew)) for i in eachindex(fitresult.μ)]...) |> + fitresult.materializer + +MLJModelInterface.fitted_params(model::DeterministicConstantRegressor, fitresult) = + (; mean=fitresult.μ) ## ## THE CONSTANT CLASSIFIER @@ -115,7 +133,11 @@ metadata_model( metadata_model( DeterministicConstantRegressor, input_scitype = Table, - target_scitype = AbstractVector{Continuous}, + target_scitype = Union{ + AbstractMatrix{Continuous}, + AbstractVector{Continuous}, + Table, + }, supports_weights = false, load_path = "MLJModels.DeterministicConstantRegressor" ) @@ -150,6 +172,9 @@ mean or median values instead. If not specified, a normal distribution is fit. Almost any reasonable model is expected to outperform `ConstantRegressor` which is used almost exclusively for testing and establishing performance baselines. +If you need a multitarget dummy regressor, consider using `DeterministicConstantRegressor` +instead. + In MLJ (or MLJModels) do `model = ConstantRegressor()` or `model = ConstantRegressor(distribution=...)` to construct a model instance. @@ -211,6 +236,67 @@ See also """ ConstantRegressor +""" + DeterministicConstantRegressor + +This "dummy" predictor always makes the same prediction, irrespective of the provided +input pattern, namely the mean value of the training target values. (It's counterpart, +`ConstantRegressor` makes probabilistic predictions.) This model handles mutlitargets, +i.e, the training target can be a matrix or a table (with rows as observations). + +Almost any reasonable model is expected to outperform `DeterministicConstantRegressor` +which is used almost exclusively for testing and establishing performance baselines. + +In MLJ, do `model = DeterministicConstantRegressor()` to construct a model instance. + + +# Training data + +In MLJ (or MLJBase) bind an instance `model` to data with + + mach = machine(model, X, y) + +Here: + +- `X` is any table of input features (eg, a `DataFrame`) + +- `y` is the target, which can be any `AbstractVector`, `AbstractVector` or table whose + element scitype is `Continuous`; check the scitype `scitype(y)` or, for tables, with + `schema(y)` + +Train the machine using `fit!(mach, rows=...)`. + +# Operations + +- `predict(mach, Xnew)`: Return predictions of the target given + features `Xnew` (which for this model are ignored). + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `mean`: The target mean(s). Always a row vector. (i.e an `AbstractMatrix` object with row dim 1) + +# Examples + +```julia +using MLJ + +X, y = make_regression(10, 2; n_targets=3); # synthetic data: two tables +regressor = DeterministicConstantRegressor(); +mach = machine(regressor, X, y) |> fit!; + +fitted_params(mach) + +Xnew, _ = make_regression(3, 2) +predict(mach, Xnew) + +``` +See also +[`ConstantClassifier`](@ref) +""" +DeterministicConstantRegressor + """ ConstantClassifier diff --git a/test/builtins/Constant.jl b/test/builtins/Constant.jl index 2636abd..23eebee 100644 --- a/test/builtins/Constant.jl +++ b/test/builtins/Constant.jl @@ -1,12 +1,13 @@ module TestConstant using Test, MLJModels, CategoricalArrays -import Distributions, MLJBase +import Distributions, MLJBase, Tables + # Any X will do for constant models: X = NamedTuple{(:x1,:x2,:x3)}((rand(10), rand(10), rand(10))) -@testset "Regressor" begin +@testset "ConstantRegressor" begin y = [1.0, 1.0, 2.0, 2.0] model = ConstantRegressor(distribution_type= @@ -25,7 +26,40 @@ X = NamedTuple{(:x1,:x2,:x3)}((rand(10), rand(10), rand(10))) @test MLJBase.load_path(model) == "MLJModels.ConstantRegressor" end -@testset "Classifier" begin +@testset "DeterministicConstantRegressor" begin + + X = (; x=ones(3)) + S = MLJBase.target_scitype(DeterministicConstantRegressor()) + + # vector target: + y = Float64[2, 3, 4] + @test MLJBase.scitype(y) <: S + mach = MLJBase.machine(MLJModels.DeterministicConstantRegressor(), X, y) + MLJBase.fit!(mach, verbosity=0) + @test MLJBase.predict(mach, X) ≈ [3, 3, 3] + @test only(MLJBase.fitted_params(mach).mean) ≈ 3 + + # matrix target: + y = Float64[2 5; 3 6; 4 7] + @test MLJBase.scitype(y) <: S + mach = MLJBase.machine(MLJModels.DeterministicConstantRegressor(), X, y) + MLJBase.fit!(mach, verbosity=0) + @test MLJBase.predict(mach, X) ≈ [3 6; 3 6; 3 6] + @test MLJBase.fitted_params(mach).mean ≈ [3 6] + + # tabular target: + y = Tables.table(Float64[2 5; 3 6; 4 7], header=[:x, :y]) |> Tables.rowtable + @test MLJBase.scitype(y) <: S + mach = MLJBase.machine(MLJModels.DeterministicConstantRegressor(), X, y) + MLJBase.fit!(mach, verbosity=0) + yhat = MLJBase.predict(mach, X) + @test yhat isa Vector{<:NamedTuple} + @test keys(yhat[1]) == (:x, :y) + @test Tables.matrix(yhat) ≈ [3 6; 3 6; 3 6] + @test MLJBase.fitted_params(mach).mean ≈ [3 6] +end + +@testset "ConstantClassifier" begin yraw = ["Perry", "Antonia", "Perry", "Skater"] y = categorical(yraw)