@@ -15,10 +15,6 @@ using StableRNGs
15
15
# replacement). In particular this algorithm has an iteration parameter `n`, and we
16
16
# implement `update` for warm restarts when `n` increases.
17
17
18
- # By re-using the data interface for `Ridge`, we ensure that the resampling (bagging) is
19
- # more efficient (no repeated table -> matrix conversions, and we resample matrices
20
- # directly, not the tables).
21
-
22
18
# no docstring here - that goes with the constructor
23
19
struct RidgeEnsemble
24
20
lambda:: Float64
44
40
45
41
LearnAPI. algorithm (model:: RidgeEnsembleFitted ) = model. algorithm
46
42
47
- # we use the same data interface we provided for `Ridge` in regression.jl:
43
+ # We add the same data interface we provided for `Ridge` in regression.jl. This is an
44
+ # optional step on which the later code does not depend.
48
45
LearnAPI. obs (algorithm:: RidgeEnsemble , data) = LearnAPI. obs (Ridge (), data)
49
46
LearnAPI. obs (model:: RidgeEnsembleFitted , data) = LearnAPI. obs (first (model. models), data)
50
47
LearnAPI. target (algorithm:: RidgeEnsemble , data) = LearnAPI. target (Ridge (), data)
51
48
LearnAPI. features (algorithm:: Ridge , data) = LearnAPI. features (Ridge (), data)
52
49
53
- function d (rng)
54
- i = digits (rng. state)
55
- m = min (length (i), 4 )
56
- tail = i[end - m + 1 : end ]
57
- println (join (string .(tail)))
58
- end
59
-
60
- # because we need observation subsampling, we first implement `fit` for output of
61
- # `obs`:
62
- function LearnAPI. fit (algorithm:: RidgeEnsemble , data:: RidgeFitObs ; verbosity= 1 )
50
+ function LearnAPI. fit (algorithm:: RidgeEnsemble , data; verbosity= 1 )
63
51
64
52
# unpack hyperparameters:
65
53
lambda = algorithm. lambda
@@ -69,16 +57,21 @@ function LearnAPI.fit(algorithm::RidgeEnsemble, data::RidgeFitObs; verbosity=1)
69
57
# instantiate atomic algorithm:
70
58
atom = Ridge (lambda)
71
59
60
+ # ensure data can be subsampled using MLUtils.jl, and that we're feeding the atomic
61
+ # `fit` data in an efficient (pre-processed) form:
62
+
63
+ observations = obs (atom, data)
64
+
72
65
# initialize ensemble:
73
66
models = []
74
67
75
68
# get number of observations:
76
- N = MLUtils. numobs (data )
69
+ N = MLUtils. numobs (observations )
77
70
78
71
# train the ensemble:
79
72
for _ in 1 : n
80
73
bag = rand (rng, 1 : N, N)
81
- data_subset = MLUtils. getobs (data , bag)
74
+ data_subset = MLUtils. getobs (observations , bag)
82
75
# step down one verbosity level in atomic fit:
83
76
model = fit (atom, data_subset; verbosity= verbosity - 1 )
84
77
push! (models, model)
@@ -91,21 +84,11 @@ function LearnAPI.fit(algorithm::RidgeEnsemble, data::RidgeFitObs; verbosity=1)
91
84
92
85
end
93
86
94
- # ... and so need a `fit` for unprocessed `data = (X, y)`:
95
- LearnAPI. fit (algorithm:: RidgeEnsemble , data; kwargs... ) =
96
- fit (algorithm, obs (algorithm, data); kwargs... )
97
-
98
87
# If `n` is increased, this `update` adds new regressors to the ensemble, including any
99
88
# new # hyperparameter updates (e.g, `lambda`) when computing the new
100
89
# regressors. Otherwise, update is equivalent to retraining from scratch, with the
101
90
# provided hyperparameter updates.
102
- function LearnAPI. update (
103
- model:: RidgeEnsembleFitted ,
104
- data:: RidgeFitObs ;
105
- verbosity= 1 ,
106
- replacements... ,
107
- )
108
-
91
+ function LearnAPI. update (model:: RidgeEnsembleFitted , data; verbosity= 1 , replacements... )
109
92
:n in keys (replacements) || return fit (model, data)
110
93
111
94
algorithm_old = LearnAPI. algorithm (model)
@@ -114,24 +97,18 @@ function LearnAPI.update(
114
97
Δn = n - algorithm_old. n
115
98
n < 0 && return fit (model, algorithm)
116
99
117
- # get number of observations:
118
- N = MLUtils. numobs (data)
100
+ atom = Ridge (; lambda= algorithm. lambda)
101
+ observations = obs (atom, data)
102
+ N = MLUtils. numobs (observations)
119
103
120
104
# initialize:
121
105
models = model. models
122
106
rng = model. rng # as mutated in previous `fit`/`update` calls
123
107
124
- atom = Ridge (; lambda= algorithm. lambda)
125
-
126
- rng2 = StableRNG (123 )
127
- for _ in 1 : 10
128
- rand (rng2)
129
- end
130
-
131
108
# add new regressors to the ensemble:
132
109
for _ in 1 : Δn
133
110
bag = rand (rng, 1 : N, N)
134
- data_subset = MLUtils. getobs (data , bag)
111
+ data_subset = MLUtils. getobs (observations , bag)
135
112
model = fit (atom, data_subset; verbosity= verbosity- 1 )
136
113
push! (models, model)
137
114
end
@@ -142,13 +119,6 @@ function LearnAPI.update(
142
119
return RidgeEnsembleFitted (algorithm, atom, rng, models)
143
120
end
144
121
145
- # an `update` for unprocessed `data = (X, y)`:
146
- LearnAPI. update (model:: RidgeEnsembleFitted , data; kwargs... ) =
147
- update (model, obs (LearnAPI. algorithm (model), data); kwargs... )
148
-
149
- # `data` here can be pre-processed or not, because we're just calling the atomic
150
- # `predict`, which already has a data interface, and we don't need any subsampling, like
151
- # we did for `fit`:
152
122
LearnAPI. predict (model:: RidgeEnsembleFitted , :: Point , data) =
153
123
mean (model. models) do atomic_model
154
124
predict (atomic_model, Point (), data)
@@ -221,115 +191,6 @@ Xtest = Tables.subset(X, test)
221
191
model = fit (LearnAPI. clone (algorithm; n= 7 ), Xtrain, y[train]; verbosity= 0 );
222
192
@test ŷ7 ≈ predict (model, Xtest)
223
193
224
-
225
- update (model, Xtest;
226
- fitobs = LearnAPI. obs (algorithm, data)
227
- predictobs = LearnAPI. obs (model, X)
228
- model = fit (algorithm, MLUtils. getobs (fitobs, train); verbosity= 0 )
229
- @test LearnAPI. target (algorithm, fitobs) == y
230
- @test predict (model, Point (), MLUtils. getobs (predictobs, test)) ≈ ŷ
231
- @test predict (model, LearnAPI. features (algorithm, fitobs)) ≈ predict (model, X)
232
-
233
- @test LearnAPI. feature_importances (model) isa Vector{<: Pair{Symbol} }
234
-
235
- filename = tempname ()
236
- using Serialization
237
- small_model = minimize (model)
238
- serialize (filename, small_model)
239
-
240
- recovered_model = deserialize (filename)
241
- @test LearnAPI. algorithm (recovered_model) == algorithm
242
- @test predict (
243
- recovered_model,
244
- Point (),
245
- MLUtils. getobs (predictobs, test)
246
- ) ≈ ŷ
247
-
248
- end
249
-
250
- # # VARIATION OF RIDGE REGRESSION THAT USES FALLBACK OF LearnAPI.obs
251
-
252
- # no docstring here - that goes with the constructor
253
- struct BabyRidge
254
- lambda:: Float64
255
- end
256
-
257
- """
258
- BabyRidge(; lambda=0.1)
259
-
260
- Instantiate a ridge regression algorithm, with regularization of `lambda`.
261
-
262
- """
263
- BabyRidge (; lambda= 0.1 ) = BabyRidge (lambda) # LearnAPI.constructor defined later
264
-
265
- struct BabyRidgeFitted{T,F}
266
- algorithm:: BabyRidge
267
- coefficients:: Vector{T}
268
- feature_importances:: F
269
- end
270
-
271
- function LearnAPI. fit (algorithm:: BabyRidge , data; verbosity= 1 )
272
-
273
- X, y = data
274
-
275
- lambda = algorithm. lambda
276
- table = Tables. columntable (X)
277
- names = Tables. columnnames (table) |> collect
278
- A = Tables. matrix (table)'
279
-
280
- # apply core algorithm:
281
- coefficients = (A* A' + algorithm. lambda* I)\ (A* y) # vector
282
-
283
- feature_importances = nothing
284
-
285
- return BabyRidgeFitted (algorithm, coefficients, feature_importances)
286
-
287
- end
288
-
289
- # extracting stuff from training data:
290
- LearnAPI. target (:: BabyRidge , data) = last (data)
291
-
292
- LearnAPI. algorithm (model:: BabyRidgeFitted ) = model. algorithm
293
-
294
- LearnAPI. predict (model:: BabyRidgeFitted , :: Point , Xnew) =
295
- Tables. matrix (Xnew)* model. coefficients
296
-
297
- LearnAPI. minimize (model:: BabyRidgeFitted ) =
298
- BabyRidgeFitted (model. algorithm, model. coefficients, nothing )
299
-
300
- @trait (
301
- BabyRidge,
302
- constructor = BabyRidge,
303
- kinds_of_proxy = (Point (),),
304
- tags = (" regression" ,),
305
- functions = (
306
- :(LearnAPI. fit),
307
- :(LearnAPI. algorithm),
308
- :(LearnAPI. minimize),
309
- :(LearnAPI. obs),
310
- :(LearnAPI. features),
311
- :(LearnAPI. target),
312
- :(LearnAPI. predict),
313
- :(LearnAPI. feature_importances),
314
- )
315
- )
316
-
317
- @testset " test a variation which does not overload LearnAPI.obs" begin
318
- algorithm = BabyRidge (lambda= 0.5 )
319
- @test
320
-
321
- model = fit (algorithm, Tables. subset (X, train), y[train]; verbosity= 0 )
322
- ŷ = predict (model, Point (), Tables. subset (X, test))
323
- @test ŷ isa Vector{Float64}
324
-
325
- fitobs = obs (algorithm, data)
326
- predictobs = LearnAPI. obs (model, X)
327
- model = fit (algorithm, MLUtils. getobs (fitobs, train); verbosity= 0 )
328
- @test predict (model, Point (), MLUtils. getobs (predictobs, test)) == ŷ ==
329
- predict (model, MLUtils. getobs (predictobs, test))
330
- @test LearnAPI. target (algorithm, data) == y
331
- @test LearnAPI. predict (model, X) ≈
332
- LearnAPI. predict (model, LearnAPI. features (algorithm, data))
333
194
end
334
195
335
196
true
0 commit comments