other tweaks

ablaom · ablaom · commit 7d45e08a68ff · 2024-10-08T10:12:33.000+13:00
diff --git a/ROADMAP.md b/ROADMAP.md
@@ -17,14 +17,14 @@
   - [ ] classification
   - [ ] clustering
   - [ ] gradient descent
-  - [ ] iterative algorithms
+  - [x] iterative algorithms
   - [ ] incremental algorithms
   - [ ] dimension reduction
   - [x] feature engineering
   - [x] static algorithms
   - [ ] missing value imputation
   - [ ] transformers
-  - [ ] ensemble algorithms
+  - [x] ensemble algorithms
   - [ ] time series forecasting
   - [ ] time series classification
   - [ ] survival analysis
diff --git a/docs/src/common_implementation_patterns.md b/docs/src/common_implementation_patterns.md
@@ -20,43 +20,54 @@ Although an implementation is defined purely by the methods and traits it implem
 implementations fall into one (or more) of the following informally understood patterns or
 "tasks":
 
+- [Regression](@ref): Supervised learners for continuous targets
+
 - [Classification](@ref): Supervised learners for categorical targets 
 
-- [Regression](@ref): Supervised learners for continuous targets
+- [Clusterering](@ref): Algorithms that group data into clusters for classification and
+  possibly dimension reduction. May be true learners (generalize to new data) or static.
+
+- [Gradient Descent](@ref): Including neural networks.
 
 - [Iterative Algorithms](@ref)
 
 - [Incremental Algorithms](@ref)
 
+- [Feature Engineering](@ref): Algorithms for selecting or combining features
+
+- [Dimension Reduction](@ref): Transformers that learn to reduce feature space dimension
+
+- [Missing Value Imputation](@ref)
+
+- [Transformers](@ref): Other transformers, such as standardizers, and categorical
+  encoders.
+
 - [Static Algorithms](@ref): Algorithms that do not learn, in the sense they must be
   re-executed for each new data set (do not generalize), but which have hyperparameters
   and/or deliver ancillary information about the computation.
+  
+- [Ensemble Algorithms](@ref): Algorithms that blend predictions of multiple algorithms
 
-- [Dimension Reduction](@ref): Transformers that learn to reduce feature space dimension
+- [Time Series Forecasting](@ref)
 
-- [Feature Engineering](@ref)
+- [Time Series Classification](@ref)
 
-- [Missing Value Imputation](@ref): Transformers that replace missing values.
+- [Survival Analysis](@ref)
 
-- [Transformers](@ref): Other transformers, such as standardizers, and categorical
-  encoders.
+- [Density Estimation](@ref): Algorithms that learn a probability distribution
 
-- [Clusterering](@ref): Algorithms that group data into clusters for classification and
-  possibly dimension reduction. May be true learners (generalize to new data) or static.
+- [Bayesian Algorithms](@ref)
 
 - [Outlier Detection](@ref): Supervised, unsupervised, or semi-supervised learners for
   anomaly detection.
 
-- [Learning a Probability Distribution](@ref): Algorithms that fit a distribution or
-  distribution-like object to data
-
-- [Time Series Forecasting](@ref)
+- [Text Analysis](@ref)
 
-- [Time Series Classification](@ref)
+- [Audio Analysis](@ref)
 
-- [Supervised Bayesian Algorithms](@ref)
+- [Natural Language Processing](@ref)
 
-- [Survival Analysis](@ref)
+- [Image Processing](@ref)
 
 - [Meta-algorithms](@ref)
 
diff --git a/docs/src/patterns/density_estimation.md b/docs/src/patterns/density_estimation.md
@@ -0,0 +1 @@
+# Density Estimation
diff --git a/docs/src/patterns/ensemble_algorithms.md b/docs/src/patterns/ensemble_algorithms.md
@@ -0,0 +1,5 @@
+# Ensemble Algorithms
+
+See [this
+example](https://github.com/JuliaAI/LearnAPI.jl/blob/dev/test/integration/iterative_algorithms.jl)
+from tests.
diff --git a/docs/src/patterns/iterative_algorithms.md b/docs/src/patterns/iterative_algorithms.md
@@ -1 +1,5 @@
 # Iterative Algorithms
+
+See [this
+example](https://github.com/JuliaAI/LearnAPI.jl/blob/dev/test/integration/iterative_algorithms.jl)
+from tests.
diff --git a/docs/src/patterns/learning_a_probability_distribution.md b/docs/src/patterns/learning_a_probability_distribution.md
diff --git a/docs/src/patterns/transformers.md b/docs/src/patterns/transformers.md
@@ -0,0 +1 @@
+# Transformers
diff --git a/src/traits.jl b/src/traits.jl
@@ -195,11 +195,11 @@ tags() = [
     "gradient descent",
     "iterative algorithms",
     "incremental algorithms",
+    "feature engineering",
     "dimension reduction",
+    "missing value imputation",
     "transformers",
-    "feature engineering",
     "static algorithms",
-    "missing value imputation",
     "ensemble algorithms",
     "time series forecasting",
     "time series classification",
diff --git a/test/integration/iterative_algorithms.jl b/test/integration/iterative_algorithms.jl
@@ -15,10 +15,6 @@ using StableRNGs
 # replacement). In particular this algorithm has an iteration parameter `n`, and we
 # implement `update` for warm restarts when `n` increases.
 
-# By re-using the data interface for `Ridge`, we ensure that the resampling (bagging) is
-# more efficient (no repeated table -> matrix conversions, and we resample matrices
-# directly, not the tables). 
-
 # no docstring here - that goes with the constructor
 struct RidgeEnsemble
     lambda::Float64
@@ -44,22 +40,14 @@ end
 
 LearnAPI.algorithm(model::RidgeEnsembleFitted) = model.algorithm
 
-# we use the same data interface we provided for `Ridge` in regression.jl:
+# We add the same data interface we provided for `Ridge` in regression.jl. This is an
+# optional step on which the later code does not depend.
 LearnAPI.obs(algorithm::RidgeEnsemble, data) = LearnAPI.obs(Ridge(), data)
 LearnAPI.obs(model::RidgeEnsembleFitted, data) = LearnAPI.obs(first(model.models), data)
 LearnAPI.target(algorithm::RidgeEnsemble, data) = LearnAPI.target(Ridge(), data)
 LearnAPI.features(algorithm::Ridge, data) = LearnAPI.features(Ridge(), data)
 
-function d(rng)
-    i = digits(rng.state)
-    m = min(length(i), 4)
-    tail = i[end - m + 1:end]
-    println(join(string.(tail)))
-end
-
-# because we need observation subsampling, we first implement `fit` for output of
-# `obs`:
-function LearnAPI.fit(algorithm::RidgeEnsemble, data::RidgeFitObs; verbosity=1)
+function LearnAPI.fit(algorithm::RidgeEnsemble, data; verbosity=1)
 
     # unpack hyperparameters:
     lambda = algorithm.lambda
@@ -69,16 +57,21 @@ function LearnAPI.fit(algorithm::RidgeEnsemble, data::RidgeFitObs; verbosity=1)
     # instantiate atomic algorithm:
     atom = Ridge(lambda)
 
+    # ensure data can be subsampled using MLUtils.jl, and that we're feeding the atomic
+    # `fit` data in an efficient (pre-processed) form:
+
+    observations = obs(atom, data)
+
     # initialize ensemble:
     models = []
 
     # get number of observations:
-    N = MLUtils.numobs(data)
+    N = MLUtils.numobs(observations)
 
     # train the ensemble:
     for _ in 1:n
         bag = rand(rng, 1:N, N)
-        data_subset = MLUtils.getobs(data, bag)
+        data_subset = MLUtils.getobs(observations, bag)
         # step down one verbosity level in atomic fit:
         model = fit(atom, data_subset; verbosity=verbosity - 1)
         push!(models, model)
@@ -91,21 +84,11 @@ function LearnAPI.fit(algorithm::RidgeEnsemble, data::RidgeFitObs; verbosity=1)
 
 end
 
-# ... and so need a `fit` for unprocessed `data = (X, y)`:
-LearnAPI.fit(algorithm::RidgeEnsemble, data; kwargs...) =
-    fit(algorithm, obs(algorithm, data); kwargs...)
-
 # If `n` is increased, this `update` adds new regressors to the ensemble, including any
 # new # hyperparameter updates (e.g, `lambda`) when computing the new
 # regressors. Otherwise, update is equivalent to retraining from scratch, with the
 # provided hyperparameter updates.
-function LearnAPI.update(
-    model::RidgeEnsembleFitted,
-    data::RidgeFitObs;
-    verbosity=1,
-    replacements...,
-    )
-
+function LearnAPI.update(model::RidgeEnsembleFitted, data; verbosity=1, replacements...)
     :n in keys(replacements) || return fit(model, data)
 
     algorithm_old = LearnAPI.algorithm(model)
@@ -114,24 +97,18 @@ function LearnAPI.update(
     Δn = n - algorithm_old.n
     n < 0 && return fit(model, algorithm)
 
-    # get number of observations:
-    N = MLUtils.numobs(data)
+    atom = Ridge(; lambda=algorithm.lambda)
+    observations = obs(atom, data)
+    N = MLUtils.numobs(observations)
 
     # initialize:
     models = model.models
     rng = model.rng # as mutated in previous `fit`/`update` calls
 
-    atom = Ridge(; lambda=algorithm.lambda)
-
-    rng2 = StableRNG(123)
-    for _ in 1:10
-        rand(rng2)
-    end
-
     # add new regressors to the ensemble:
     for _ in 1:Δn
         bag = rand(rng, 1:N, N)
-        data_subset = MLUtils.getobs(data, bag)
+        data_subset = MLUtils.getobs(observations, bag)
         model = fit(atom, data_subset; verbosity=verbosity-1)
         push!(models, model)
     end
@@ -142,13 +119,6 @@ function LearnAPI.update(
     return RidgeEnsembleFitted(algorithm, atom, rng, models)
 end
 
-# an `update` for unprocessed `data = (X, y)`:
-LearnAPI.update(model::RidgeEnsembleFitted, data; kwargs...) =
-    update(model, obs(LearnAPI.algorithm(model), data); kwargs...)
-
-# `data` here can be pre-processed or not, because we're just calling the atomic
-# `predict`, which already has a data interface, and we don't need any subsampling, like
-# we did for `fit`:
 LearnAPI.predict(model::RidgeEnsembleFitted, ::Point, data) =
     mean(model.models) do atomic_model
         predict(atomic_model, Point(), data)
@@ -221,115 +191,6 @@ Xtest = Tables.subset(X, test)
     model = fit(LearnAPI.clone(algorithm; n=7), Xtrain, y[train]; verbosity=0);
     @test ŷ7 ≈ predict(model, Xtest)
 
-
-    update(model, Xtest;
-    fitobs = LearnAPI.obs(algorithm, data)
-    predictobs = LearnAPI.obs(model, X)
-    model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0)
-    @test LearnAPI.target(algorithm, fitobs) == y
-    @test predict(model, Point(), MLUtils.getobs(predictobs, test)) ≈ ŷ
-    @test predict(model, LearnAPI.features(algorithm, fitobs)) ≈ predict(model, X)
-
-    @test LearnAPI.feature_importances(model) isa Vector{<:Pair{Symbol}}
-
-    filename = tempname()
-    using Serialization
-    small_model = minimize(model)
-    serialize(filename, small_model)
-
-    recovered_model = deserialize(filename)
-    @test LearnAPI.algorithm(recovered_model) == algorithm
-    @test predict(
-        recovered_model,
-        Point(),
-        MLUtils.getobs(predictobs, test)
-    ) ≈ ŷ
-
-end
-
-# # VARIATION OF RIDGE REGRESSION THAT USES FALLBACK OF LearnAPI.obs
-
-# no docstring here - that goes with the constructor
-struct BabyRidge
-    lambda::Float64
-end
-
-"""
-    BabyRidge(; lambda=0.1)
-
-Instantiate a ridge regression algorithm, with regularization of `lambda`.
-
-"""
-BabyRidge(; lambda=0.1) = BabyRidge(lambda) # LearnAPI.constructor defined later
-
-struct BabyRidgeFitted{T,F}
-    algorithm::BabyRidge
-    coefficients::Vector{T}
-    feature_importances::F
-end
-
-function LearnAPI.fit(algorithm::BabyRidge, data; verbosity=1)
-
-    X, y = data
-
-    lambda = algorithm.lambda
-    table = Tables.columntable(X)
-    names = Tables.columnnames(table) |> collect
-    A = Tables.matrix(table)'
-
-    # apply core algorithm:
-    coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector
-
-    feature_importances = nothing
-
-    return BabyRidgeFitted(algorithm, coefficients, feature_importances)
-
-end
-
-# extracting stuff from training data:
-LearnAPI.target(::BabyRidge, data) = last(data)
-
-LearnAPI.algorithm(model::BabyRidgeFitted) = model.algorithm
-
-LearnAPI.predict(model::BabyRidgeFitted, ::Point, Xnew) =
-    Tables.matrix(Xnew)*model.coefficients
-
-LearnAPI.minimize(model::BabyRidgeFitted) =
-    BabyRidgeFitted(model.algorithm, model.coefficients, nothing)
-
-@trait(
-    BabyRidge,
-    constructor = BabyRidge,
-    kinds_of_proxy = (Point(),),
-    tags = ("regression",),
-    functions = (
-        :(LearnAPI.fit),
-        :(LearnAPI.algorithm),
-        :(LearnAPI.minimize),
-        :(LearnAPI.obs),
-        :(LearnAPI.features),
-        :(LearnAPI.target),
-        :(LearnAPI.predict),
-        :(LearnAPI.feature_importances),
-   )
-)
-
-@testset "test a variation  which does not overload LearnAPI.obs" begin
-           algorithm = BabyRidge(lambda=0.5)
-           @test
-
-    model = fit(algorithm, Tables.subset(X, train), y[train]; verbosity=0)
-    ŷ = predict(model, Point(), Tables.subset(X, test))
-    @test ŷ isa Vector{Float64}
-
-    fitobs = obs(algorithm, data)
-    predictobs = LearnAPI.obs(model, X)
-    model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0)
-    @test predict(model, Point(), MLUtils.getobs(predictobs, test)) == ŷ ==
-        predict(model, MLUtils.getobs(predictobs, test))
-    @test LearnAPI.target(algorithm, data) == y
-    @test LearnAPI.predict(model, X) ≈
-        LearnAPI.predict(model, LearnAPI.features(algorithm, data))
 end
 
 true
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -6,6 +6,7 @@ test_files = [
     "clone.jl",
     "integration/regression.jl",
     "integration/static_algorithms.jl",
+    "integration/iterative_algorithms.jl",
 ]
 
 files = isempty(ARGS) ? test_files : ARGS

Original file line number	Diff line number	Diff line change
`@@ -6,6 +6,7 @@ test_files = [`
`6`	`6`	`"clone.jl",`
`7`	`7`	`"integration/regression.jl",`
`8`	`8`	`"integration/static_algorithms.jl",`
	`9`	`+ "integration/iterative_algorithms.jl",`
`9`	`10`	`]`
`10`	`11`
`11`	`12`	`files = isempty(ARGS) ? test_files : ARGS`