|
| 1 | +using LearnAPI |
| 2 | +using LinearAlgebra |
| 3 | +using Tables |
| 4 | +import MLUtils |
| 5 | +import DataFrames |
| 6 | +using Random |
| 7 | +using Statistics |
| 8 | +using StableRNGs |
| 9 | + |
| 10 | +# # ENSEMBLE OF RIDGE REGRESSORS |
| 11 | + |
| 12 | +# We implement a toy algorithm that creates an bagged ensemble of ridge regressors (as |
| 13 | +# defined already in test/integration/regressors.jl), i.e, where each atomic model is |
| 14 | +# trained on a random sample of the training observations (same number, but sampled with |
| 15 | +# replacement). In particular this algorithm has an iteration parameter `n`, and we |
| 16 | +# implement `update` for warm restarts when `n` increases. |
| 17 | + |
| 18 | +# By re-using the data interface for `Ridge`, we ensure that the resampling (bagging) is |
| 19 | +# more efficient (no repeated table -> matrix conversions, and we resample matrices |
| 20 | +# directly, not the tables). |
| 21 | + |
| 22 | +# no docstring here - that goes with the constructor |
| 23 | +struct RidgeEnsemble |
| 24 | + lambda::Float64 |
| 25 | + rng # leaving abstract for simplicity |
| 26 | + n::Int |
| 27 | +end |
| 28 | + |
| 29 | +""" |
| 30 | + RidgeEnsemble(; lambda=0.1, rng=Random.default_rng(), n=10) |
| 31 | +
|
| 32 | +Instantiate a RidgeEnsemble algorithm, bla, bla, bla... |
| 33 | +
|
| 34 | +""" |
| 35 | +RidgeEnsemble(; lambda=0.1, rng=Random.default_rng(), n=10) = |
| 36 | + RidgeEnsemble(lambda, rng, n) # LearnAPI.constructor defined later |
| 37 | + |
| 38 | +struct RidgeEnsembleFitted |
| 39 | + algorithm::RidgeEnsemble |
| 40 | + atom::Ridge |
| 41 | + rng # mutated copy of `algorithm.rng` |
| 42 | + models # leaving type abstract for simplicity |
| 43 | +end |
| 44 | + |
| 45 | +LearnAPI.algorithm(model::RidgeEnsembleFitted) = model.algorithm |
| 46 | + |
| 47 | +# we use the same data interface we provided for `Ridge` in regression.jl: |
| 48 | +LearnAPI.obs(algorithm::RidgeEnsemble, data) = LearnAPI.obs(Ridge(), data) |
| 49 | +LearnAPI.obs(model::RidgeEnsembleFitted, data) = LearnAPI.obs(first(model.models), data) |
| 50 | +LearnAPI.target(algorithm::RidgeEnsemble, data) = LearnAPI.target(Ridge(), data) |
| 51 | +LearnAPI.features(algorithm::Ridge, data) = LearnAPI.features(Ridge(), data) |
| 52 | + |
| 53 | +function d(rng) |
| 54 | + i = digits(rng.state) |
| 55 | + m = min(length(i), 4) |
| 56 | + tail = i[end - m + 1:end] |
| 57 | + println(join(string.(tail))) |
| 58 | +end |
| 59 | + |
| 60 | +# because we need observation subsampling, we first implement `fit` for output of |
| 61 | +# `obs`: |
| 62 | +function LearnAPI.fit(algorithm::RidgeEnsemble, data::RidgeFitObs; verbosity=1) |
| 63 | + |
| 64 | + # unpack hyperparameters: |
| 65 | + lambda = algorithm.lambda |
| 66 | + rng = deepcopy(algorithm.rng) # to prevent mutation of `algorithm` |
| 67 | + n = algorithm.n |
| 68 | + |
| 69 | + # instantiate atomic algorithm: |
| 70 | + atom = Ridge(lambda) |
| 71 | + |
| 72 | + # initialize ensemble: |
| 73 | + models = [] |
| 74 | + |
| 75 | + # get number of observations: |
| 76 | + N = MLUtils.numobs(data) |
| 77 | + |
| 78 | + # train the ensemble: |
| 79 | + for _ in 1:n |
| 80 | + bag = rand(rng, 1:N, N) |
| 81 | + data_subset = MLUtils.getobs(data, bag) |
| 82 | + # step down one verbosity level in atomic fit: |
| 83 | + model = fit(atom, data_subset; verbosity=verbosity - 1) |
| 84 | + push!(models, model) |
| 85 | + end |
| 86 | + |
| 87 | + # make some noise, if allowed: |
| 88 | + verbosity > 0 && @info "Trained $n ridge regression models. " |
| 89 | + |
| 90 | + return RidgeEnsembleFitted(algorithm, atom, rng, models) |
| 91 | + |
| 92 | +end |
| 93 | + |
| 94 | +# ... and so need a `fit` for unprocessed `data = (X, y)`: |
| 95 | +LearnAPI.fit(algorithm::RidgeEnsemble, data; kwargs...) = |
| 96 | + fit(algorithm, obs(algorithm, data); kwargs...) |
| 97 | + |
| 98 | +# If `n` is increased, this `update` adds new regressors to the ensemble, including any |
| 99 | +# new # hyperparameter updates (e.g, `lambda`) when computing the new |
| 100 | +# regressors. Otherwise, update is equivalent to retraining from scratch, with the |
| 101 | +# provided hyperparameter updates. |
| 102 | +function LearnAPI.update( |
| 103 | + model::RidgeEnsembleFitted, |
| 104 | + data::RidgeFitObs; |
| 105 | + verbosity=1, |
| 106 | + replacements..., |
| 107 | + ) |
| 108 | + |
| 109 | + :n in keys(replacements) || return fit(model, data) |
| 110 | + |
| 111 | + algorithm_old = LearnAPI.algorithm(model) |
| 112 | + algorithm = LearnAPI.clone(algorithm_old; replacements...) |
| 113 | + n = algorithm.n |
| 114 | + Δn = n - algorithm_old.n |
| 115 | + n < 0 && return fit(model, algorithm) |
| 116 | + |
| 117 | + # get number of observations: |
| 118 | + N = MLUtils.numobs(data) |
| 119 | + |
| 120 | + # initialize: |
| 121 | + models = model.models |
| 122 | + rng = model.rng # as mutated in previous `fit`/`update` calls |
| 123 | + |
| 124 | + atom = Ridge(; lambda=algorithm.lambda) |
| 125 | + |
| 126 | + rng2 = StableRNG(123) |
| 127 | + for _ in 1:10 |
| 128 | + rand(rng2) |
| 129 | + end |
| 130 | + |
| 131 | + # add new regressors to the ensemble: |
| 132 | + for _ in 1:Δn |
| 133 | + bag = rand(rng, 1:N, N) |
| 134 | + data_subset = MLUtils.getobs(data, bag) |
| 135 | + model = fit(atom, data_subset; verbosity=verbosity-1) |
| 136 | + push!(models, model) |
| 137 | + end |
| 138 | + |
| 139 | + # make some noise, if allowed: |
| 140 | + verbosity > 0 && @info "Trained $Δn additional ridge regression models. " |
| 141 | + |
| 142 | + return RidgeEnsembleFitted(algorithm, atom, rng, models) |
| 143 | +end |
| 144 | + |
| 145 | +# an `update` for unprocessed `data = (X, y)`: |
| 146 | +LearnAPI.update(model::RidgeEnsembleFitted, data; kwargs...) = |
| 147 | + update(model, obs(LearnAPI.algorithm(model), data); kwargs...) |
| 148 | + |
| 149 | +# `data` here can be pre-processed or not, because we're just calling the atomic |
| 150 | +# `predict`, which already has a data interface, and we don't need any subsampling, like |
| 151 | +# we did for `fit`: |
| 152 | +LearnAPI.predict(model::RidgeEnsembleFitted, ::Point, data) = |
| 153 | + mean(model.models) do atomic_model |
| 154 | + predict(atomic_model, Point(), data) |
| 155 | + end |
| 156 | + |
| 157 | +LearnAPI.minimize(model::RidgeEnsembleFitted) = RidgeEnsembleFitted( |
| 158 | + model.algorithm, |
| 159 | + model.atom, |
| 160 | + model.rng, |
| 161 | + minimize.(Ref(model.atom), models), |
| 162 | +) |
| 163 | + |
| 164 | +# note the inclusion of `iteration_parameter`: |
| 165 | +@trait( |
| 166 | + RidgeEnsemble, |
| 167 | + constructor = RidgeEnsemble, |
| 168 | + iteration_parameter = :n, |
| 169 | + kinds_of_proxy = (Point(),), |
| 170 | + tags = ("regression", "ensemble algorithms", "iterative models"), |
| 171 | + functions = ( |
| 172 | + :(LearnAPI.fit), |
| 173 | + :(LearnAPI.algorithm), |
| 174 | + :(LearnAPI.minimize), |
| 175 | + :(LearnAPI.obs), |
| 176 | + :(LearnAPI.features), |
| 177 | + :(LearnAPI.target), |
| 178 | + :(LearnAPI.update), |
| 179 | + :(LearnAPI.predict), |
| 180 | + :(LearnAPI.feature_importances), |
| 181 | + ) |
| 182 | +) |
| 183 | + |
| 184 | +# synthetic test data: |
| 185 | +N = 10 # number of observations |
| 186 | +train = 1:6 |
| 187 | +test = 7:10 |
| 188 | +a, b, c = rand(N), rand(N), rand(N) |
| 189 | +X = (; a, b, c) |
| 190 | +X = DataFrames.DataFrame(X) |
| 191 | +y = 2a - b + 3c + 0.05*rand(N) |
| 192 | +data = (X, y) |
| 193 | +Xtrain = Tables.subset(X, train) |
| 194 | +Xtest = Tables.subset(X, test) |
| 195 | + |
| 196 | +@testset "test an implementation of bagged ensemble of ridge regressors" begin |
| 197 | + rng = StableRNG(123) |
| 198 | + algorithm = RidgeEnsemble(lambda=0.5, n=4; rng) |
| 199 | + @test LearnAPI.clone(algorithm) == algorithm |
| 200 | + @test :(LearnAPI.obs) in LearnAPI.functions(algorithm) |
| 201 | + @test LearnAPI.target(algorithm, data) == y |
| 202 | + @test LearnAPI.features(algorithm, data) == X |
| 203 | + |
| 204 | + model = @test_logs( |
| 205 | + (:info, r"Trained 4 ridge"), |
| 206 | + fit(algorithm, Xtrain, y[train]; verbosity=1), |
| 207 | + ); |
| 208 | + |
| 209 | + ŷ4 = predict(model, Point(), Xtest) |
| 210 | + @test ŷ4 == predict(model, Xtest) |
| 211 | + |
| 212 | + # add 3 atomic models to the ensemble: |
| 213 | + # model = @test_logs( |
| 214 | + # (:info, r"Trained 3 additional"), |
| 215 | + # update(model, Xtrain, y[train]; n=7), |
| 216 | + # ) |
| 217 | + model = update(model, Xtrain, y[train]; verbosity=0, n=7); |
| 218 | + ŷ7 = predict(model, Xtest) |
| 219 | + |
| 220 | + # compare with cold restart: |
| 221 | + model = fit(LearnAPI.clone(algorithm; n=7), Xtrain, y[train]; verbosity=0); |
| 222 | + @test ŷ7 ≈ predict(model, Xtest) |
| 223 | + |
| 224 | + |
| 225 | + update(model, Xtest; |
| 226 | + fitobs = LearnAPI.obs(algorithm, data) |
| 227 | + predictobs = LearnAPI.obs(model, X) |
| 228 | + model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) |
| 229 | + @test LearnAPI.target(algorithm, fitobs) == y |
| 230 | + @test predict(model, Point(), MLUtils.getobs(predictobs, test)) ≈ ŷ |
| 231 | + @test predict(model, LearnAPI.features(algorithm, fitobs)) ≈ predict(model, X) |
| 232 | + |
| 233 | + @test LearnAPI.feature_importances(model) isa Vector{<:Pair{Symbol}} |
| 234 | + |
| 235 | + filename = tempname() |
| 236 | + using Serialization |
| 237 | + small_model = minimize(model) |
| 238 | + serialize(filename, small_model) |
| 239 | + |
| 240 | + recovered_model = deserialize(filename) |
| 241 | + @test LearnAPI.algorithm(recovered_model) == algorithm |
| 242 | + @test predict( |
| 243 | + recovered_model, |
| 244 | + Point(), |
| 245 | + MLUtils.getobs(predictobs, test) |
| 246 | + ) ≈ ŷ |
| 247 | + |
| 248 | +end |
| 249 | + |
| 250 | +# # VARIATION OF RIDGE REGRESSION THAT USES FALLBACK OF LearnAPI.obs |
| 251 | + |
| 252 | +# no docstring here - that goes with the constructor |
| 253 | +struct BabyRidge |
| 254 | + lambda::Float64 |
| 255 | +end |
| 256 | + |
| 257 | +""" |
| 258 | + BabyRidge(; lambda=0.1) |
| 259 | +
|
| 260 | +Instantiate a ridge regression algorithm, with regularization of `lambda`. |
| 261 | +
|
| 262 | +""" |
| 263 | +BabyRidge(; lambda=0.1) = BabyRidge(lambda) # LearnAPI.constructor defined later |
| 264 | + |
| 265 | +struct BabyRidgeFitted{T,F} |
| 266 | + algorithm::BabyRidge |
| 267 | + coefficients::Vector{T} |
| 268 | + feature_importances::F |
| 269 | +end |
| 270 | + |
| 271 | +function LearnAPI.fit(algorithm::BabyRidge, data; verbosity=1) |
| 272 | + |
| 273 | + X, y = data |
| 274 | + |
| 275 | + lambda = algorithm.lambda |
| 276 | + table = Tables.columntable(X) |
| 277 | + names = Tables.columnnames(table) |> collect |
| 278 | + A = Tables.matrix(table)' |
| 279 | + |
| 280 | + # apply core algorithm: |
| 281 | + coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector |
| 282 | + |
| 283 | + feature_importances = nothing |
| 284 | + |
| 285 | + return BabyRidgeFitted(algorithm, coefficients, feature_importances) |
| 286 | + |
| 287 | +end |
| 288 | + |
| 289 | +# extracting stuff from training data: |
| 290 | +LearnAPI.target(::BabyRidge, data) = last(data) |
| 291 | + |
| 292 | +LearnAPI.algorithm(model::BabyRidgeFitted) = model.algorithm |
| 293 | + |
| 294 | +LearnAPI.predict(model::BabyRidgeFitted, ::Point, Xnew) = |
| 295 | + Tables.matrix(Xnew)*model.coefficients |
| 296 | + |
| 297 | +LearnAPI.minimize(model::BabyRidgeFitted) = |
| 298 | + BabyRidgeFitted(model.algorithm, model.coefficients, nothing) |
| 299 | + |
| 300 | +@trait( |
| 301 | + BabyRidge, |
| 302 | + constructor = BabyRidge, |
| 303 | + kinds_of_proxy = (Point(),), |
| 304 | + tags = ("regression",), |
| 305 | + functions = ( |
| 306 | + :(LearnAPI.fit), |
| 307 | + :(LearnAPI.algorithm), |
| 308 | + :(LearnAPI.minimize), |
| 309 | + :(LearnAPI.obs), |
| 310 | + :(LearnAPI.features), |
| 311 | + :(LearnAPI.target), |
| 312 | + :(LearnAPI.predict), |
| 313 | + :(LearnAPI.feature_importances), |
| 314 | + ) |
| 315 | +) |
| 316 | + |
| 317 | +@testset "test a variation which does not overload LearnAPI.obs" begin |
| 318 | + algorithm = BabyRidge(lambda=0.5) |
| 319 | + @test |
| 320 | + |
| 321 | + model = fit(algorithm, Tables.subset(X, train), y[train]; verbosity=0) |
| 322 | + ŷ = predict(model, Point(), Tables.subset(X, test)) |
| 323 | + @test ŷ isa Vector{Float64} |
| 324 | + |
| 325 | + fitobs = obs(algorithm, data) |
| 326 | + predictobs = LearnAPI.obs(model, X) |
| 327 | + model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) |
| 328 | + @test predict(model, Point(), MLUtils.getobs(predictobs, test)) == ŷ == |
| 329 | + predict(model, MLUtils.getobs(predictobs, test)) |
| 330 | + @test LearnAPI.target(algorithm, data) == y |
| 331 | + @test LearnAPI.predict(model, X) ≈ |
| 332 | + LearnAPI.predict(model, LearnAPI.features(algorithm, data)) |
| 333 | +end |
| 334 | + |
| 335 | +true |
0 commit comments