diff --git a/Project.toml b/Project.toml index 2d23d7e2..7f9d9267 100644 --- a/Project.toml +++ b/Project.toml @@ -3,9 +3,6 @@ uuid = "92ad9a40-7767-427a-9ee6-6e577f1266cb" authors = ["Anthony D. Blaom "] version = "0.1.0" -[deps] -InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - [compat] julia = "1.6" diff --git a/docs/make.jl b/docs/make.jl index 77405bc2..86525b98 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -21,7 +21,7 @@ makedocs( "target/weights/features" => "target_weights_features.md", "obs" => "obs.md", "Accessor Functions" => "accessor_functions.md", - "Algorithm Traits" => "traits.md", + "Learner Traits" => "traits.md", ], "Common Implementation Patterns" => "common_implementation_patterns.md", "Testing an Implementation" => "testing_an_implementation.md", diff --git a/docs/src/accessor_functions.md b/docs/src/accessor_functions.md index 68adab31..cba1a91c 100644 --- a/docs/src/accessor_functions.md +++ b/docs/src/accessor_functions.md @@ -1,10 +1,10 @@ # [Accessor Functions](@id accessor_functions) The sole argument of an accessor function is the output, `model`, of -[`fit`](@ref). Algorithms are free to implement any number of these, or none of them. Only +[`fit`](@ref). Learners are free to implement any number of these, or none of them. Only `LearnAPI.strip` has a fallback, namely the identity. -- [`LearnAPI.algorithm(model)`](@ref) +- [`LearnAPI.learner(model)`](@ref) - [`LearnAPI.extras(model)`](@ref) - [`LearnAPI.strip(model)`](@ref) - [`LearnAPI.coefficients(model)`](@ref) @@ -18,12 +18,12 @@ The sole argument of an accessor function is the output, `model`, of - [`LearnAPI.training_scores(model)`](@ref) - [`LearnAPI.components(model)`](@ref) -Algorithm-specific accessor functions may also be implemented. The names of all accessor -functions are included in the list returned by [`LearnAPI.functions(algorithm)`](@ref). +Learner-specific accessor functions may also be implemented. The names of all accessor +functions are included in the list returned by [`LearnAPI.functions(learner)`](@ref). ## Implementation guide -All new implementations must implement [`LearnAPI.algorithm`](@ref). While, all others are +All new implementations must implement [`LearnAPI.learner`](@ref). While, all others are optional, any implemented accessor functions must be added to the list returned by [`LearnAPI.functions`](@ref). @@ -31,7 +31,7 @@ optional, any implemented accessor functions must be added to the list returned ## Reference ```@docs -LearnAPI.algorithm +LearnAPI.learner LearnAPI.extras LearnAPI.strip LearnAPI.coefficients diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 319e98ed..4c36ae2d 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -8,10 +8,12 @@ refer to the [demonstration](@ref workflow) of the implementation given later. The core LearnAPI.jl pattern looks like this: ```julia -model = fit(algorithm, data) +model = fit(learner, data) predict(model, newdata) ``` +Here `learner` specifies hyperparameters, while `model` stores learned parameters and any byproducts of algorithm execution. + A transformer ordinarily implements `transform` instead of `predict`. For more on `predict` versus `transform`, see [Predict or transform?](@ref) @@ -19,8 +21,8 @@ A transformer ordinarily implements `transform` instead of `predict`. For more o New implementations of `fit`, `predict`, etc, always have a *single* `data` argument as above. - For convenience, a signature such as `fit(algorithm, X, y)`, calling - `fit(algorithm, (X, y))`, can be added, but the LearnAPI.jl specification is + For convenience, a signature such as `fit(learner, X, y)`, calling + `fit(learner, (X, y))`, can be added, but the LearnAPI.jl specification is silent on the meaning or existence of signatures with extra arguments. !!! note @@ -28,7 +30,8 @@ A transformer ordinarily implements `transform` instead of `predict`. For more o If the `data` object consumed by `fit`, `predict`, or `transform` is not not a suitable table¹, array³, tuple of tables and arrays, or some other object implementing - the MLUtils.jl `getobs`/`numobs` interface, + the [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) + `getobs`/`numobs` interface, then an implementation must: (i) overload [`obs`](@ref) to articulate how provided data can be transformed into a form that does support this interface, as illustrated below under @@ -46,9 +49,9 @@ using LinearAlgebra, Tables nothing # hide ``` -## Defining algorithms +## Defining learners -Here's a new type whose instances specify ridge regression parameters: +Here's a new type whose instances specify ridge regression hyperparameters: ```@example anatomy struct Ridge{T<:Real} @@ -57,26 +60,26 @@ end nothing # hide ``` -Instances of `Ridge` will be [algorithms](@ref algorithms), in LearnAPI.jl parlance. +Instances of `Ridge` are *[learners](@ref learners)*, in LearnAPI.jl parlance. -Associated with each new type of LearnAPI.jl [algorithm](@ref algorithms) will be a keyword -argument constructor, providing default values for all properties (struct fields) that are -not other algorithms, and we must implement [`LearnAPI.constructor(algorithm)`](@ref), for -recovering the constructor from an instance: +Associated with each new type of LearnAPI.jl [learner](@ref learners) will be a keyword +argument constructor, providing default values for all properties (typically, struct +fields) that are not other learners, and we must implement +[`LearnAPI.constructor(learner)`](@ref), for recovering the constructor from an instance: ```@example anatomy """ Ridge(; lambda=0.1) -Instantiate a ridge regression algorithm, with regularization of `lambda`. +Instantiate a ridge regression learner, with regularization of `lambda`. """ Ridge(; lambda=0.1) = Ridge(lambda) LearnAPI.constructor(::Ridge) = Ridge nothing # hide ``` -For example, in this case, if `algorithm = Ridge(0.2)`, then -`LearnAPI.constructor(algorithm)(lambda=0.2) == algorithm` is true. Note that we attach +For example, in this case, if `learner = Ridge(0.2)`, then +`LearnAPI.constructor(learner)(lambda=0.2) == learner` is true. Note that we attach the docstring to the *constructor*, not the struct. @@ -90,20 +93,20 @@ coefficients labelled by feature name for inspection after training: ```@example anatomy struct RidgeFitted{T,F} - algorithm::Ridge + learner::Ridge coefficients::Vector{T} named_coefficients::F end nothing # hide ``` -Note that we also include `algorithm` in the struct, for it must be possible to recover -`algorithm` from the output of `fit`; see [Accessor functions](@ref) below. +Note that we also include `learner` in the struct, for it must be possible to recover +`learner` from the output of `fit`; see [Accessor functions](@ref) below. The core implementation of `fit` looks like this: ```@example anatomy -function LearnAPI.fit(algorithm::Ridge, data; verbosity=1) +function LearnAPI.fit(learner::Ridge, data; verbosity=1) X, y = data @@ -112,10 +115,10 @@ function LearnAPI.fit(algorithm::Ridge, data; verbosity=1) names = Tables.columnnames(table) |> collect A = Tables.matrix(table, transpose=true) - lambda = algorithm.lambda + lambda = learner.lambda # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector + coefficients = (A*A' + learner.lambda*I)\(A*y) # vector # determine named coefficients: named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] @@ -123,7 +126,7 @@ function LearnAPI.fit(algorithm::Ridge, data; verbosity=1) # make some noise, if allowed: verbosity > 0 && @info "Coefficients: $named_coefficients" - return RidgeFitted(algorithm, coefficients, named_coefficients) + return RidgeFitted(learner, coefficients, named_coefficients) end ``` @@ -149,34 +152,34 @@ LearnAPI.predict(model::RidgeFitted, ::Point, Xnew) = ``` If the kind of proxy is omitted, as in `predict(model, Xnew)`, then a fallback grabs the -first element of the tuple returned by [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), which +first element of the tuple returned by [`LearnAPI.kinds_of_proxy(learner)`](@ref), which we overload appropriately below. ## Extracting the target from training data The `fit` method consumes data which includes a [target variable](@ref proxy), i.e., the -algorithm is a supervised learner. We must therefore declare how the target variable can be extracted +learner is a supervised learner. We must therefore declare how the target variable can be extracted from training data, by implementing [`LearnAPI.target`](@ref): ```@example anatomy -LearnAPI.target(algorithm, data) = last(data) +LearnAPI.target(learner, data) = last(data) ``` There is a similar method, [`LearnAPI.features`](@ref) for declaring how training features -can be extracted (for passing to `predict`, for example) but this method has a fallback -which typically suffices: return `first(data)` if `data` is a tuple, and otherwise return -`data`. +can be extracted (something that can be passed to `predict`) but this method has a +fallback which suffices here: it returns `first(data)` if `data` is a tuple, and `data` +otherwise. ## Accessor functions An [accessor function](@ref accessor_functions) has the output of [`fit`](@ref) as it's sole argument. Every new implementation must implement the accessor function -[`LearnAPI.algorithm`](@ref) for recovering an algorithm from a fitted object: +[`LearnAPI.learner`](@ref) for recovering a learner from a fitted object: ```@example anatomy -LearnAPI.algorithm(model::RidgeFitted) = model.algorithm +LearnAPI.learner(model::RidgeFitted) = model.learner ``` Other accessor functions extract learned parameters or some standard byproducts of @@ -195,17 +198,17 @@ dump the named version of the coefficients: ```@example anatomy LearnAPI.strip(model::RidgeFitted) = - RidgeFitted(model.algorithm, model.coefficients, nothing) + RidgeFitted(model.learner, model.coefficients, nothing) ``` Crucially, we can still use `LearnAPI.strip(model)` in place of `model` to make new predictions. -## Algorithm traits +## Learner traits -Algorithm [traits](@ref traits) record extra generic information about an algorithm, or -make specific promises of behavior. They are methods that have an algorithm as the sole +Learner [traits](@ref traits) record extra generic information about a learner, or +make specific promises of behavior. They are methods that have a learner as the sole argument, and so we regard [`LearnAPI.constructor`](@ref) defined above as a trait. Because we have implemented `predict`, we are required to overload the @@ -226,7 +229,7 @@ A macro provides a shortcut, convenient when multiple traits are to be defined: tags = (:regression,), functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -239,15 +242,15 @@ nothing # hide ``` The last trait, `functions`, returns a list of all LearnAPI.jl methods that can be -meaninfully applied to the algorithm or associated model. See [`LearnAPI.functions`](@ref) +meaninfully applied to the learner or associated model. See [`LearnAPI.functions`](@ref) for a checklist. [`LearnAPI.functions`](@ref) and [`LearnAPI.constructor`](@ref), are the only universally compulsory traits. However, it is worthwhile studying the [list of all traits](@ref traits_list) to see which might apply to a new implementation, to enable maximum buy into functionality provided by third party packages, and to assist third party algorithms that match machine learning algorithms to user-defined tasks. -Note that we know `Ridge` instances are supervised algorithms because `:(LearnAPI.target) -in LearnAPI.functions(algorithm)`, for every instance `algorithm`. With [some +Note that we know `Ridge` instances are supervised learners because `:(LearnAPI.target) +in LearnAPI.functions(learner)`, for every instance `learner`. With [some exceptions](@ref trait_contract), the value of a trait should depend only on the *type* of the argument. @@ -257,7 +260,7 @@ We add one `fit` signature for user-convenience only. The LearnAPI.jl specificat nothing to say about `fit` signatures with more than two positional arguments. ```@example anatomy -LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = fit(algorithm, (X, y); kwargs...) +LearnAPI.fit(learner::Ridge, X, y; kwargs...) = fit(learner, (X, y); kwargs...) ``` ## [Demonstration](@id workflow) @@ -277,8 +280,8 @@ nothing # hide ``` ```@example anatomy -algorithm = Ridge(lambda=0.5) -foreach(println, LearnAPI.functions(algorithm)) +learner = Ridge(lambda=0.5) +foreach(println, LearnAPI.functions(learner)) ``` Training and predicting: @@ -286,7 +289,7 @@ Training and predicting: ```@example anatomy Xtrain = Tables.subset(X, train) ytrain = y[train] -model = fit(algorithm, (Xtrain, ytrain)) # `fit(algorithm, Xtrain, ytrain)` will also work +model = fit(learner, (Xtrain, ytrain)) # `fit(learner, Xtrain, ytrain)` will also work ŷ = predict(model, Tables.subset(X, test)) ``` @@ -307,7 +310,7 @@ serialize(filename, small_model) ```julia recovered_model = deserialize(filename) -@assert LearnAPI.algorithm(recovered_model) == algorithm +@assert LearnAPI.learner(recovered_model) == learner @assert predict(recovered_model, X) == predict(model, X) ``` @@ -324,15 +327,15 @@ end Ridge(; lambda=0.1) = Ridge(lambda) struct RidgeFitted{T,F} - algorithm::Ridge + learner::Ridge coefficients::Vector{T} named_coefficients::F end -LearnAPI.algorithm(model::RidgeFitted) = model.algorithm +LearnAPI.learner(model::RidgeFitted) = model.learner LearnAPI.coefficients(model::RidgeFitted) = model.named_coefficients LearnAPI.strip(model::RidgeFitted) = - RidgeFitted(model.algorithm, model.coefficients, nothing) + RidgeFitted(model.learner, model.coefficients, nothing) @trait( Ridge, @@ -341,7 +344,7 @@ LearnAPI.strip(model::RidgeFitted) = tags = (:regression,), functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -390,16 +393,16 @@ methods - one to handle "regular" input, and one to handle the pre-processed dat (observations) which appears first below: ```@example anatomy2 -function LearnAPI.fit(algorithm::Ridge, observations::RidgeFitObs; verbosity=1) +function LearnAPI.fit(learner::Ridge, observations::RidgeFitObs; verbosity=1) - lambda = algorithm.lambda + lambda = learner.lambda A = observations.A names = observations.names y = observations.y - # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix + # apply core learner: + coefficients = (A*A' + learner.lambda*I)\(A*y) # 1 x p matrix # determine named coefficients: named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] @@ -407,19 +410,19 @@ function LearnAPI.fit(algorithm::Ridge, observations::RidgeFitObs; verbosity=1) # make some noise, if allowed: verbosity > 0 && @info "Coefficients: $named_coefficients" - return RidgeFitted(algorithm, coefficients, named_coefficients) + return RidgeFitted(learner, coefficients, named_coefficients) end -LearnAPI.fit(algorithm::Ridge, data; kwargs...) = - fit(algorithm, obs(algorithm, data); kwargs...) +LearnAPI.fit(learner::Ridge, data; kwargs...) = + fit(learner, obs(learner, data); kwargs...) ``` ### The `obs` contract Providing `fit` signatures matching the output of `obs`, is the first part of the `obs` contract. The second part is this: *The output of `obs` must implement the interface -specified by the trait* [`LearnAPI.data_interface(algorithm)`](@ref). Assuming this is +specified by the trait* [`LearnAPI.data_interface(learner)`](@ref). Assuming this is [`LearnAPI.RandomAccess()`](@ref) (the default) it usually suffices to overload `Base.getindex` and `Base.length`: @@ -462,7 +465,7 @@ as the fallback mentioned above is no longer adequate. ### Important notes: -- The observations to be consumed by `fit` are returned by `obs(algorithm::Ridge, ...)`, +- The observations to be consumed by `fit` are returned by `obs(learner::Ridge, ...)`, while those consumed by `predict` are returned by `obs(model::RidgeFitted, ...)`. We need the different signatures because the form of data consumed by `fit` and `predict` are generally different. @@ -477,7 +480,7 @@ argument, overloading `obs` is optional. This is provided data in publicized [`LearnAPI.RandomAccess`](@ref) interface (most tables¹, arrays³, and tuples thereof). To opt out of supporting the MLUtils.jl interface altogether, an implementation must -overload the trait, [`LearnAPI.data_interface(algorithm)`](@ref). See [Data +overload the trait, [`LearnAPI.data_interface(learner)`](@ref). See [Data interfaces](@ref data_interfaces) for details. @@ -486,7 +489,7 @@ interfaces](@ref data_interfaces) for details. As above, we add a signature which plays no role vis-à-vis LearnAPI.jl. ```@example anatomy2 -LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = fit(algorithm, (X, y); kwargs...) +LearnAPI.fit(learner::Ridge, X, y; kwargs...) = fit(learner, (X, y); kwargs...) ``` ## Demonstration of an advanced `obs` workflow @@ -496,9 +499,9 @@ generic MLUtils.jl interface: ```@example anatomy2 import MLUtils -algorithm = Ridge() -observations_for_fit = obs(algorithm, (X, y)) -model = fit(algorithm, MLUtils.getobs(observations_for_fit, train)) +learner = Ridge() +observations_for_fit = obs(learner, (X, y)) +model = fit(learner, MLUtils.getobs(observations_for_fit, train)) observations_for_predict = obs(model, X) ẑ = predict(model, MLUtils.getobs(observations_for_predict, test)) ``` diff --git a/docs/src/common_implementation_patterns.md b/docs/src/common_implementation_patterns.md index c554ca45..9b128c6a 100644 --- a/docs/src/common_implementation_patterns.md +++ b/docs/src/common_implementation_patterns.md @@ -1,8 +1,6 @@ -# Common Implementation Patterns +# [Common Implementation Patterns](@id patterns) -!!! warning - -!!! warning +!!! important This section is only an implementation guide. The definitive specification of the Learn API is given in [Reference](@ref reference). diff --git a/docs/src/fit_update.md b/docs/src/fit_update.md index a0486bdb..29f7af01 100644 --- a/docs/src/fit_update.md +++ b/docs/src/fit_update.md @@ -3,12 +3,12 @@ ### Training ```julia -fit(algorithm, data; verbosity=1) -> model -fit(algorithm; verbosity=1) -> static_model +fit(learner, data; verbosity=1) -> model +fit(learner; verbosity=1) -> static_model ``` A "static" algorithm is one that does not generalize to new observations (e.g., some -clustering algorithms); there is no trainiing data and the algorithm is executed by +clustering algorithms); there is no training data and the algorithm is executed by `predict` or `transform` which receive the data. See example below. @@ -20,17 +20,15 @@ update_observations(model, new_data; verbosity=1, param1=new_value1, ...) -> upd update_features(model, new_data; verbosity=1, param1=new_value1, ...) -> updated_model ``` -Data slurping forms are similarly provided for updating methods. - ## Typical workflows ### Supervised models -Supposing `Algorithm` is some supervised classifier type, with an iteration parameter `n`: +Supposing `Learner` is some supervised classifier type, with an iteration parameter `n`: ```julia -algorithm = Algorithm(n=100) -model = fit(algorithm, (X, y)) +learner = Learner(n=100) +model = fit(learner, (X, y)) # Predict probability distributions: ŷ = predict(model, Distribution(), Xnew) @@ -47,30 +45,30 @@ See also [Classification](@ref) and [Regression](@ref). ### Transformers -A dimension-reducing transformer, `algorithm` might be used in this way: +A dimension-reducing transformer, `learner` might be used in this way: ```julia -model = fit(algorithm, X) +model = fit(learner, X) transform(model, X) # or `transform(model, Xnew)` ``` or, if implemented, using a single call: ```julia -transform(algorithm, X) # `fit` implied +transform(learner, X) # `fit` implied ``` ### [Static algorithms (no "learning")](@id static_algorithms) -Suppose `algorithm` is some clustering algorithm that cannot be generalized to new data +Suppose `learner` is some clustering algorithm that cannot be generalized to new data (e.g. DBSCAN): ```julia -model = fit(algorithm) # no training data +model = fit(learner) # no training data labels = predict(model, X) # may mutate `model` # Or, in one line: -labels = predict(algorithm, X) +labels = predict(learner, X) # But two-line version exposes byproducts of the clustering algorithm (e.g., outliers): LearnAPI.extras(model) @@ -84,14 +82,14 @@ In density estimation, `fit` consumes no features, only a target variable; `pred which consumes no data, returns the learned density: ```julia -model = fit(algorithm, y) # no features +model = fit(learner, y) # no features predict(model) # shortcut for `predict(model, SingleDistribution())`, or similar ``` A one-liner will typically be implemented as well: ```julia -predict(algorithm, y) +predict(learner, y) ``` See also [Density Estimation](@ref). @@ -101,10 +99,12 @@ See also [Density Estimation](@ref). ### Training -| method | fallback | compulsory? | -|:-------------------------------------------------------------------------------|:-----------------------------------------------------------------|--------------------| -| [`fit`](@ref)`(algorithm, data; verbosity=1)` | ignores `data` and applies signature below | yes, unless static | -| [`fit`](@ref)`(algorithm; verbosity=1)` | none | no, unless static | +Exactly one of the following must be implemented: + +| method | fallback | +|:--------------------------------------------|:---------| +| [`fit`](@ref)`(learner, data; verbosity=1)` | none | +| [`fit`](@ref)`(learner; verbosity=1)` | none | ### Updating @@ -114,7 +114,7 @@ See also [Density Estimation](@ref). | [`update_observations`](@ref)`(model, data; verbosity=1, hyperparameter_updates...)` | none | no | | [`update_features`](@ref)`(model, data; verbosity=1, hyperparameter_updates...)` | none | no | -There are some contracts regarding the behaviour of the update methods, as they relate to +There are some contracts governing the behaviour of the update methods, as they relate to a previous `fit` call. Consult the document strings for details. ## Reference diff --git a/docs/src/index.md b/docs/src/index.md index 713932cf..727199ff 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -11,13 +11,13 @@ A base Julia interface for machine learning and statistics LearnAPI.jl is a lightweight, functional-style interface, providing a collection of [methods](@ref Methods), such as `fit` and `predict`, to be implemented by algorithms from -machine learning and statistics. Its careful design ensures algorithms implementing -LearnAPI.jl can buy into functionality, such as external performance estimates, -hyperparameter optimization and model composition, provided by ML/statistics toolboxes and -other packages. LearnAPI.jl includes a number of Julia [traits](@ref traits) for promising -specific behavior. +machine learning and statistics, some examples of which are listed [here](@ref +patterns). A careful design ensures algorithms implementing LearnAPI.jl can buy into +functionality, such as external performance estimates, hyperparameter optimization and +model composition, provided by ML/statistics toolboxes and other packages. LearnAPI.jl +includes a number of Julia [traits](@ref traits) for promising specific behavior. -LearnAPI.jl's only dependency is the standard library `InteractiveUtils`. +LearnAPI.jl's has no package dependencies. ```@raw html 🚧 @@ -63,9 +63,9 @@ LearnAPI.feature_importances(model) small_model = LearnAPI.strip(model) serialize("my_random_forest.jls", small_model) -# Recover saved model and algorithm configuration: +# Recover saved model and algorithm configuration ("learner"): recovered_model = deserialize("my_random_forest.jls") -@assert LearnAPI.algorithm(recovered_model) == forest +@assert LearnAPI.learner(recovered_model) == forest @assert predict(recovered_model, Point(), Xnew) == ŷ ``` @@ -73,7 +73,7 @@ recovered_model = deserialize("my_random_forest.jls") dispatch based on the [kind of target proxy](@ref proxy), a key LearnAPI.jl concept. LearnAPI.jl places more emphasis on the notion of target variables and target proxies than on the usual supervised/unsupervised learning dichotomy. From this point of view, a -supervised algorithm is simply one in which a target variable exists, and happens to +supervised learner is simply one in which a target variable exists, and happens to appear as an input to training but not to prediction. ## Data interfaces @@ -99,7 +99,7 @@ loaders reading images from disk). - [Reference](@ref reference): official specification -- [Common Implementation Patterns](@ref): implementation suggestions for common, +- [Common Implementation Patterns](@ref patterns): implementation suggestions for common, informally defined, algorithm types - [Testing an Implementation](@ref) diff --git a/docs/src/obs.md b/docs/src/obs.md index 5818ea76..3d206b70 100644 --- a/docs/src/obs.md +++ b/docs/src/obs.md @@ -1,38 +1,38 @@ # [`obs` and Data Interfaces](@id data_interface) The `obs` method takes data intended as input to `fit`, `predict` or `transform`, and -transforms it to an algorithm-specific form guaranteed to implement a form of observation -access designated by the algorithm. The transformed data can then be resampled and passed +transforms it to a learner-specific form guaranteed to implement a form of observation +access designated by the learner. The transformed data can then be resampled and passed on to the relevant method in place of the original input. Using `obs` may provide performance advantages over naive workflows in some cases (e.g., cross-validation). ```julia -obs(algorithm, data) # can be passed to `fit` instead of `data` +obs(learner, data) # can be passed to `fit` instead of `data` obs(model, data) # can be passed to `predict` or `transform` instead of `data` ``` ## [Typical workflows](@id obs_workflows) LearnAPI.jl makes no universal assumptions about the form of `data` in a call -like `fit(algorithm, data)`. However, if we define +like `fit(learner, data)`. However, if we define ```julia -observations = obs(algorithm, data) +observations = obs(learner, data) ``` -then, assuming the typical case that `LearnAPI.data_interface(algorithm) == +then, assuming the typical case that `LearnAPI.data_interface(learner) == LearnAPI.RandomAccess()`, `observations` implements the [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs`/`numobs` interface, for grabbing and counting observations. Moreover, we can pass `observations` to `fit` in place of the original data, or first resample it using `MLUtils.getobs`: ```julia -# equivalent to `model = fit(algorithm, data)` -model = fit(algorithm, observations) +# equivalent to `model = fit(learner, data)` +model = fit(learner, observations) # with resampling: resampled_observations = MLUtils.getobs(observations, 1:10) -model = fit(algorithm, resampled_observations) +model = fit(learner, resampled_observations) ``` In some implementations, the alternative pattern above can be used to avoid repeating @@ -43,24 +43,24 @@ how a user might call `obs` and `MLUtils.getobs` to perform efficient cross-vali using LearnAPI import MLUtils -algorithm = +learner = data = -X = LearnAPI.features(algorithm, data) -y = LearnAPI.target(algorithm, data) +X = LearnAPI.features(learner, data) +y = LearnAPI.target(learner, data) train_test_folds = map([1:10, 11:20, 21:30]) do test (setdiff(1:30, test), test) end -fitobs = obs(algorithm, data) +fitobs = obs(learner, data) never_trained = true scores = map(train_test_folds) do (train, test) # train using model-specific representation of data: fitobs_subset = MLUtils.getobs(fitobs, train) - model = fit(algorithm, fitobs_subset) + model = fit(learner, fitobs_subset) # predict on the fold complement: if never_trained @@ -79,7 +79,7 @@ end | method | comment | compulsory? | fallback | |:-------------------------------|:------------------------------------|:-------------:|:---------------| -| [`obs(algorithm, data)`](@ref) | here `data` is `fit`-consumable | not typically | returns `data` | +| [`obs(learner, data)`](@ref) | here `data` is `fit`-consumable | not typically | returns `data` | | [`obs(model, data)`](@ref) | here `data` is `predict`-consumable | not typically | returns `data` | @@ -94,7 +94,7 @@ obs ### [Data interfaces](@id data_interfaces) -New implementations must overload [`LearnAPI.data_interface(algorithm)`](@ref) if the +New implementations must overload [`LearnAPI.data_interface(learner)`](@ref) if the output of [`obs`](@ref) does not implement [`LearnAPI.RandomAccess`](@ref). (Arrays, most tables, and all tuples thereof, implement `RandomAccess`.) diff --git a/docs/src/predict_transform.md b/docs/src/predict_transform.md index 605ee27a..a6a00047 100644 --- a/docs/src/predict_transform.md +++ b/docs/src/predict_transform.md @@ -6,15 +6,15 @@ transform(model, data) inverse_transform(model, data) ``` -Versions without the `data` argument may also appear, for example in [Density +Versions without the `data` argument may apply, for example in [Density estimation](@ref). ## [Typical worklows](@id predict_workflow) -Train some supervised `algorithm`: +Train some supervised `learner`: ```julia -model = fit(algorithm, (X, y)) +model = fit(learner, (X, y)) ``` Predict probability distributions: @@ -29,10 +29,10 @@ Generate point predictions: ŷ = predict(model, Point(), Xnew) ``` -Train a dimension-reducing `algorithm`: +Train a dimension-reducing `learner`: ```julia -model = fit(algorithm, X) +model = fit(learner, X) Xnew_reduced = transform(model, Xnew) ``` @@ -42,11 +42,17 @@ Apply an approximate right inverse: inverse_transform(model, Xnew_reduced) ``` +Fit and transform in one line: + +```julia +transform(learner, data) # `fit` implied +``` + ### An advanced workflow ```julia -fitobs = obs(algorithm, (X, y)) # algorithm-specific repr. of data -model = fit(algorithm, MLUtils.getobs(fitobs, 1:100)) +fitobs = obs(learner, (X, y)) # learner-specific repr. of data +model = fit(learner, MLUtils.getobs(fitobs, 1:100)) predictobs = obs(model, MLUtils.getobs(X, 101:150)) ŷ = predict(model, Point(), predictobs) ``` @@ -62,37 +68,37 @@ ŷ = predict(model, Point(), predictobs) ### Predict or transform? -If the algorithm has a notion of [target variable](@ref proxy), then use +If the learner has a notion of [target variable](@ref proxy), then use [`predict`](@ref) to output each supported [kind of target proxy](@ref proxy_types) (`Point()`, `Distribution()`, etc). For output not associated with a target variable, implement [`transform`](@ref) instead, which does not dispatch on [`LearnAPI.KindOfProxy`](@ref), but can be optionally paired with an implementation of [`inverse_transform`](@ref), for returning (approximate) -right inverses to `transform`. +right or left inverses to `transform`. -Of course, the one algorithm can implement both a `predict` and `transform` method. For +Of course, the one learner can implement both a `predict` and `transform` method. For example a K-means clustering algorithm can `predict` labels and `transform` to reduce dimension using distances from the cluster centres. ### [One-liners combining fit and transform/predict](@id one_liners) -Algorithms may optionally overload `transform` to apply `fit` first, using the supplied +Learners may optionally overload `transform` to apply `fit` first, using the supplied data if required, and then immediately `transform` the same data. The same applies to -`predict`. In that case the first argument of `transform`/`predict` is an *algorithm* +`predict`. In that case the first argument of `transform`/`predict` is an *learner* instead of the output of `fit`: ```julia -predict(algorithm, kind_of_proxy, data) # `fit` implied -transform(algorithm, data) # `fit` implied +predict(learner, kind_of_proxy, data) # `fit` implied +transform(learner, data) # `fit` implied ``` -For example, if `fit(algorithm, X)` is defined, then `predict(algorithm, X)` will be +For example, if `fit(learner, X)` is defined, then `predict(learner, X)` will be shorthand for ```julia -model = fit(algorithm, X) +model = fit(learner, X) predict(model, X) ``` diff --git a/docs/src/reference.md b/docs/src/reference.md index 9c13ee79..c6e9aaf3 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -2,7 +2,7 @@ Here we give the definitive specification of the LearnAPI.jl interface. For informal guides see [Anatomy of an Implementation](@ref) and [Common Implementation -Patterns](@ref). +Patterns](@ref patterns). ## [Important terms and concepts](@id scope) @@ -16,7 +16,7 @@ ML/statistical algorithms are typically applied in conjunction with resampling o *observations*, as in [cross-validation](https://en.wikipedia.org/wiki/Cross-validation_(statistics)). In this document *data* will always refer to objects encapsulating an ordered sequence of -individual observations. If an algorithm is trained using multiple data objects, it is +individual observations. If a learner is trained using multiple data objects, it is undertood that individual objects share the same number of observations, and that resampling of one component implies synchronized resampling of the others. @@ -29,9 +29,9 @@ see [`obs`](@ref) and [`LearnAPI.data_interface`](@ref) for details. !!! note - In the MLUtils.jl - convention, observations in tables are the rows but observations in a matrix are the - columns. + In the MLUtils.jl + convention, observations in tables are the rows but observations in a matrix are the + columns. ### [Hyperparameters](@id hyperparameters) @@ -70,67 +70,69 @@ dispatch. These are also used to distinguish performance metrics provided by the [StatisticalMeasures.jl](https://juliaai.github.io/StatisticalMeasures.jl/dev/). -### [Algorithms](@id algorithms) +### [Learners](@id learners) -An object implementing the LearnAPI.jl interface is called an *algorithm*, although it is -more accurately "the configuration of some algorithm".¹ An algorithm encapsulates a -particular set of user-specified [hyperparameters](@ref) as the object's *properties* -(which conceivably differ from its fields). It does not store learned parameters. +An object implementing the LearnAPI.jl interface is called a *learner*, although it is +more accurately "the configuration of some machine learning or statistical algorithm".¹ A +learner encapsulates a particular set of user-specified [hyperparameters](@ref) as the +object's *properties* (which conceivably differ from its fields). It does not store +learned parameters. Informally, we will sometimes use the word "model" to refer to the output of -`fit(algorithm, ...)` (see below), something which typically does store learned +`fit(learner, ...)` (see below), something which typically does *store* learned parameters. -For `algorithm` to be a valid LearnAPI.jl algorithm, -[`LearnAPI.constructor(algorithm)`](@ref) must be defined and return a keyword constructor -enabling recovery of `algorithm` from its properties: +For `learner` to be a valid LearnAPI.jl learner, +[`LearnAPI.constructor(learner)`](@ref) must be defined and return a keyword constructor +enabling recovery of `learner` from its properties: ```julia -properties = propertynames(algorithm) -named_properties = NamedTuple{properties}(getproperty.(Ref(algorithm), properties)) -@assert algorithm == LearnAPI.constructor(algorithm)(; named_properties...) +properties = propertynames(learner) +named_properties = NamedTuple{properties}(getproperty.(Ref(learner), properties)) +@assert learner == LearnAPI.constructor(learner)(; named_properties...) ``` -which can be tested with `@assert `[`LearnAPI.clone(algorithm)`](@ref)` == algorithm`. +which can be tested with `@assert `[`LearnAPI.clone(learner)`](@ref)` == learner`. -Note that if if `algorithm` is an instance of a *mutable* struct, this requirement +Note that if if `learner` is an instance of a *mutable* struct, this requirement generally requires overloading `Base.==` for the struct. -No LearnAPI.jl method is permitted to mutate an algorithm. In particular, one should make +No LearnAPI.jl method is permitted to mutate a learner. In particular, one should make deep copies of RNG hyperparameters before using them in a new implementation of [`fit`](@ref). -#### Composite algorithms (wrappers) +#### Composite learners (wrappers) -A *composite algorithm* is one with at least one property that can take other algorithms -as values; for such algorithms [`LearnAPI.is_composite`](@ref)`(algorithm)` must be `true` +A *composite learner* is one with at least one property that can take other learners as +values; for such learners [`LearnAPI.is_composite`](@ref)`(learner)` must be `true` (fallback is `false`). Generally, the keyword constructor provided by [`LearnAPI.constructor`](@ref) must provide default values for all properties that are not -algorithm-valued. Instead, these algorithm-valued properties can have a `nothing` default, -with the constructor throwing an error if the default value persists. +learner-valued. Instead, these learner-valued properties can have a `nothing` default, +with the constructor throwing an error if the the constructor call does not explicitly +specify a new value. -Any object `algorithm` for which [`LearnAPI.functions`](@ref)`(algorithm)` is non-empty is +Any object `learner` for which [`LearnAPI.functions(learner)`](@ref) is non-empty is understood to have a valid implementation of the LearnAPI.jl interface. #### Example -Any instance of `GradientRidgeRegressor` defined below is a valid algorithm. +Any instance of `GradientRidgeRegressor` defined below is a valid learner. ```julia struct GradientRidgeRegressor{T<:Real} - learning_rate::T - epochs::Int - l2_regularization::T + learning_rate::T + epochs::Int + l2_regularization::T end GradientRidgeRegressor(; learning_rate=0.01, epochs=10, l2_regularization=0.01) = - GradientRidgeRegressor(learning_rate, epochs, l2_regularization) + GradientRidgeRegressor(learning_rate, epochs, l2_regularization) LearnAPI.constructor(::GradientRidgeRegressor) = GradientRidgeRegressor ``` ## Documentation -Attach public LearnAPI.jl-related documentation for an algorithm to it's *constructor*, -rather than to the struct defining its type. In this way, an algorithm can implement +Attach public LearnAPI.jl-related documentation for a learner to it's *constructor*, +rather than to the struct defining its type. In this way, a learner can implement multiple interfaces, in addition to the LearnAPI interface, with separate document strings for each. @@ -138,20 +140,20 @@ for each. !!! note "Compulsory methods" - All new algorithm types must implement [`fit`](@ref), - [`LearnAPI.algorithm`](@ref), [`LearnAPI.constructor`](@ref) and - [`LearnAPI.functions`](@ref). + All new learner types must implement [`fit`](@ref), + [`LearnAPI.learner`](@ref), [`LearnAPI.constructor`](@ref) and + [`LearnAPI.functions`](@ref). -Most algorithms will also implement [`predict`](@ref) and/or [`transform`](@ref). For a -bare minimum implementation, see the implementation of `SmallAlgorithm` +Most learners will also implement [`predict`](@ref) and/or [`transform`](@ref). For a +bare minimum implementation, see the implementation of `SmallLearner` [here](https://github.com/JuliaAI/LearnAPI.jl/blob/dev/test/traits.jl). ### List of methods -- [`fit`](@ref fit): for training or updating algorithms that generalize to new data. Or, - for non-generalizing algorithms (see [here](@ref static_algorithms) and [Static - Algorithms](@ref)), for wrapping `algorithm` in a mutable struct that can be mutated by - `predict`/`transform` to record byproducts of those operations. +- [`fit`](@ref fit): for (i) training or updating learners that generalize to new data; or + (ii) wrapping `learner` in an object that is possibly mutated by `predict`/`transform`, + to record byproducts of those operations, in the special case of *non-generalizing* + learners (called here [static algorithms](@ref static_algorithms)) - [`update`](@ref fit): for updating learning outcomes after hyperparameter changes, such as increasing an iteration parameter. @@ -173,18 +175,18 @@ bare minimum implementation, see the implementation of `SmallAlgorithm` defined. - [`obs`](@ref data_interface): method for exposing to the user - algorithm-specific representations of data, which are additionally guaranteed to + learner-specific representations of data, which are additionally guaranteed to implement the observation access API specified by - [`LearnAPI.data_interface(algorithm)`](@ref). + [`LearnAPI.data_interface(learner)`](@ref). - [Accessor functions](@ref accessor_functions): these include functions like `LearnAPI.feature_importances` and `LearnAPI.training_losses`, for extracting, from - training outcomes, information common to many algorithms. This includes + training outcomes, information common to many learners. This includes [`LearnAPI.strip(model)`](@ref) for replacing a learning outcome `model` with a serializable version that can still `predict` or `transform`. -- [Algorithm traits](@ref traits): methods that promise specific algorithm behavior or - record general information about the algorithm. Only [`LearnAPI.constructor`](@ref) and +- [Learner traits](@ref traits): methods that promise specific learner behavior or + record general information about the learner. Only [`LearnAPI.constructor`](@ref) and [`LearnAPI.functions`](@ref) are universally compulsory. @@ -197,8 +199,8 @@ LearnAPI.@trait --- -¹ We acknowledge users may not like this terminology, and may know "algorithm" by some -other name, such as "strategy", "options", "hyperparameter set", "configuration", or -"model". Consensus on this point is difficult; see, e.g., +¹ We acknowledge users may not like this terminology, and may know "learner" by some other +name, such as "strategy", "options", "hyperparameter set", "configuration", "algorithm", +or "model". Consensus on this point is difficult; see, e.g., [this](https://discourse.julialang.org/t/ann-learnapi-jl-proposal-for-a-basement-level-machine-learning-api/93048/20) Julia Discourse discussion. diff --git a/docs/src/target_weights_features.md b/docs/src/target_weights_features.md index 910b9a4c..c54639a6 100644 --- a/docs/src/target_weights_features.md +++ b/docs/src/target_weights_features.md @@ -3,25 +3,25 @@ Methods for extracting parts of training data: ```julia -LearnAPI.target(algorithm, data) -> -LearnAPI.weights(algorithm, data) -> -LearnAPI.features(algorithm, data) -> +LearnAPI.target(learner, data) -> +LearnAPI.weights(learner, data) -> +LearnAPI.features(learner, data) -> ``` -Here `data` is something supported in a call of the form `fit(algorithm, data)`. +Here `data` is something supported in a call of the form `fit(learner, data)`. # Typical workflow Not typically appearing in a general user's workflow but useful in meta-alagorithms, such as cross-validation (see the example in [`obs` and Data Interfaces](@ref data_interface)). -Supposing `algorithm` is a supervised classifier predicting a one-dimensional vector +Supposing `learner` is a supervised classifier predicting a one-dimensional vector target: ```julia -model = fit(algorithm, data) -X = LearnAPI.features(algorithm, data) -y = LearnAPI.target(algorithm, data) +model = fit(learner, data) +X = LearnAPI.features(learner, data) +y = LearnAPI.target(learner, data) ŷ = predict(model, Point(), X) training_loss = sum(ŷ .!= y) ``` diff --git a/docs/src/traits.md b/docs/src/traits.md index cb03f03d..f47f1633 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -1,9 +1,9 @@ -# [Algorithm Traits](@id traits) +# [Learner Traits](@id traits) -Algorithm traits are simply functions whose sole argument is an algorithm. +Learner traits are simply functions whose sole argument is a learner. -Traits promise specific algorithm behavior, such as: *This algorithm can make point or -probabilistic predictions* or *This algorithm is supervised* (sees a target in +Traits promise specific learner behavior, such as: *This learner can make point or +probabilistic predictions* or *This learner is supervised* (sees a target in training). They may also record more mundane information, such as a package license. ## [Trait summary](@id trait_summary) @@ -15,46 +15,46 @@ In the examples column of the table below, `Continuous` is a name owned the pack | trait | return value | fallback value | example | |:-----------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------|:-----------------------------------------------------------| -| [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | -| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :LearnAPI.strip, :(LearnAPI.algorithm), :obs)` | -| [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | -| [`LearnAPI.tags`](@ref)`(algorithm)` | lists one or more suggestive algorithm tags from `LearnAPI.tags()` | `()` | (:regression, :probabilistic) | -| [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | -| [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | -| [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | -| [`LearnAPI.doc_url`](@ref)`(algorithm)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | -| [`LearnAPI.load_path`](@ref)`(algorithm)` | string locating name returned by `LearnAPI.constructor(algorithm)`, beginning with a package name | "unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | -| [`LearnAPI.is_composite`](@ref)`(algorithm)` | `true` if one or more properties of `algorithm` may be an algorithm | `false` | `true` | -| [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | -| [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | -| [`LearnAPI.data_interface`](@ref)`(algorithm)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | -| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | -| [`LearnAPI.target_observation_scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | -| [`LearnAPI.is_static`](@ref)`(algorithm)` | `true` if `fit` consumes no data | `false` | `true` | +| [`LearnAPI.constructor`](@ref)`(learner)` | constructor for generating new or modified versions of `learner` | (no fallback) | `RidgeRegressor` | +| [`LearnAPI.functions`](@ref)`(learner)` | functions you can apply to `learner` or associated model (traits excluded) | `()` | `(:fit, :predict, :LearnAPI.strip, :(LearnAPI.learner), :obs)` | +| [`LearnAPI.kinds_of_proxy`](@ref)`(learner)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(learner, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | +| [`LearnAPI.tags`](@ref)`(learner)` | lists one or more suggestive learner tags from `LearnAPI.tags()` | `()` | (:regression, :probabilistic) | +| [`LearnAPI.is_pure_julia`](@ref)`(learner)` | `true` if implementation is 100% Julia code | `false` | `true` | +| [`LearnAPI.pkg_name`](@ref)`(learner)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | +| [`LearnAPI.pkg_license`](@ref)`(learner)` | name of license of package providing core code | `"unknown"` | `"MIT"` | +| [`LearnAPI.doc_url`](@ref)`(learner)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | +| [`LearnAPI.load_path`](@ref)`(learner)` | string locating name returned by `LearnAPI.constructor(learner)`, beginning with a package name | "unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | +| [`LearnAPI.is_composite`](@ref)`(learner)` | `true` if one or more properties of `learner` may be a learner | `false` | `true` | +| [`LearnAPI.human_name`](@ref)`(learner)` | human name for the learner; should be a noun | type name with spaces | "elastic net regressor" | +| [`LearnAPI.iteration_parameter`](@ref)`(learner)` | symbolic name of an iteration parameter | `nothing` | :epochs | +| [`LearnAPI.data_interface`](@ref)`(learner)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | +| [`LearnAPI.fit_observation_scitype`](@ref)`(learner)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(learner, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | +| [`LearnAPI.target_observation_scitype`](@ref)`(learner)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | +| [`LearnAPI.is_static`](@ref)`(learner)` | `true` if `fit` consumes no data | `false` | `true` | ### Derived Traits -The following are provided for convenience but should not be overloaded by new algorithms: +The following are provided for convenience but should not be overloaded by new learners: | trait | return value | example | |:-----------------------------------|:-------------------------------------------------------------------------|:--------| -| `LearnAPI.name(algorithm)` | algorithm type name as string | "PCA" | -| `LearnAPI.is_algorithm(algorithm)` | `true` if `algorithm` is LearnAPI.jl-compliant | `true` | -| `LearnAPI.target(algorithm)` | `true` if `fit` sees a target variable; see [`LearnAPI.target`](@ref) | `false` | -| `LearnAPI.weights(algorithm)` | `true` if `fit` supports per-observation; see [`LearnAPI.weights`](@ref) | `false` | +| `LearnAPI.name(learner)` | learner type name as string | "PCA" | +| `LearnAPI.is_learner(learner)` | `true` if `learner` is LearnAPI.jl-compliant | `true` | +| `LearnAPI.target(learner)` | `true` if `fit` sees a target variable; see [`LearnAPI.target`](@ref) | `false` | +| `LearnAPI.weights(learner)` | `true` if `fit` supports per-observation; see [`LearnAPI.weights`](@ref) | `false` | ## Implementation guide A single-argument trait is declared following this pattern: ```julia -LearnAPI.is_pure_julia(algorithm::MyAlgorithmType) = true +LearnAPI.is_pure_julia(learner::MyLearnerType) = true ``` A shorthand for single-argument traits is available: ```julia -@trait MyAlgorithmType is_pure_julia=true +@trait MyLearnerType is_pure_julia=true ``` Multiple traits can be declared like this: @@ -62,7 +62,7 @@ Multiple traits can be declared like this: ```julia @trait( - MyAlgorithmType, + MyLearnerType, is_pure_julia = true, pkg_name = "MyPackage", ) @@ -70,20 +70,20 @@ Multiple traits can be declared like this: ### [The global trait contract](@id trait_contract) -To ensure that trait metadata can be stored in an external algorithm registry, LearnAPI.jl +To ensure that trait metadata can be stored in an external learner registry, LearnAPI.jl requires: -1. *Finiteness:* The value of a trait is the same for all `algorithm`s with same value of - [`LearnAPI.constructor(algorithm)`](@ref). This typically means trait values do not - depend on type parameters! If `is_composite(algorithm) = true`, this requirement is +1. *Finiteness:* The value of a trait is the same for all `learner`s with same value of + [`LearnAPI.constructor(learner)`](@ref). This typically means trait values do not + depend on type parameters! If `is_composite(learner) = true`, this requirement is dropped. 2. *Low level deserializability:* It should be possible to evaluate the trait *value* when `LearnAPI` is the only imported module. -Because of 1, combining a lot of functionality into one algorithm (e.g. the algorithm can +Because of 1, combining a lot of functionality into one learner (e.g. the learner can perform both classification or regression) can mean traits are necessarily less -informative (as in `LearnAPI.target_observation_scitype(algorithm) = Any`). +informative (as in `LearnAPI.target_observation_scitype(learner) = Any`). ## Reference diff --git a/src/LearnAPI.jl b/src/LearnAPI.jl index 74fdd84b..c1564e06 100644 --- a/src/LearnAPI.jl +++ b/src/LearnAPI.jl @@ -1,7 +1,5 @@ module LearnAPI -import InteractiveUtils.subtypes - include("tools.jl") include("types.jl") include("predict_transform.jl") @@ -16,7 +14,7 @@ export @trait export fit, update, update_observations, update_features export predict, transform, inverse_transform, obs -for name in Symbol.(CONCRETE_TARGET_PROXY_TYPES_SYMBOLS) +for name in CONCRETE_TARGET_PROXY_SYMBOLS @eval export $name end diff --git a/src/accessor_functions.jl b/src/accessor_functions.jl index 84859307..bbc713fc 100644 --- a/src/accessor_functions.jl +++ b/src/accessor_functions.jl @@ -9,32 +9,32 @@ const DOC_STATIC = """ - For "static" algorithms (those without training `data`) it may be necessary to first + For "static" learners (those without training `data`) it may be necessary to first call `transform` or `predict` on `model`. """ """ - LearnAPI.algorithm(model) - LearnAPI.algorithm(LearnAPI.stripd_model) + LearnAPI.learner(model) + LearnAPI.learner(LearnAPI.stripd_model) -Recover the algorithm used to train `model` or the output of [`LearnAPI.strip(model)`](@ref). +Recover the learner used to train `model` or the output of [`LearnAPI.strip(model)`](@ref). -In other words, if `model = fit(algorithm, data...)`, for some `algorithm` and `data`, +In other words, if `model = fit(learner, data...)`, for some `learner` and `data`, then ```julia -LearnAPI.algorithm(model) == algorithm == LearnAPI.algorithm(LearnAPI.strip(model)) +LearnAPI.learner(model) == learner == LearnAPI.learner(LearnAPI.strip(model)) ``` is `true`. # New implementations -Implementation is compulsory for new algorithm types. The behaviour described above is the -only contract. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.algorithm)")) +Implementation is compulsory for new learner types. The behaviour described above is the +only contract. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.learner)")) """ -function algorithm end +function learner end """ LearnAPI.strip(model; options...) @@ -44,16 +44,16 @@ Return a version of `model` that will generally have a smaller memory allocation [`fit`](@ref). Accessor functions that can be called on `model` may not work on `LearnAPI.strip(model)`, but [`predict`](@ref), [`transform`](@ref) and [`inverse_transform`](@ref) will work, if implemented. Check -`LearnAPI.functions(LearnAPI.algorithm(model))` to view see what the original `model` +`LearnAPI.functions(LearnAPI.learner(model))` to view see what the original `model` implements. -Specific algorithms may provide keyword `options` to control how much of the original -functionality is preserved by `LearnAPI.strip`. +Implementations may provide learner-specific keyword `options` to control how much of the +original functionality is preserved by `LearnAPI.strip`. # Typical workflow ```julia -model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` +model = fit(learner, (X, y)) # or `fit(learner, X, y)` ŷ = predict(model, Point(), Xnew) small_model = LearnAPI.strip(model) @@ -67,7 +67,7 @@ recovered_model = deserialize("my_random_forest.jls") # New implementations -Overloading `LearnAPI.strip` for new algorithms is optional. The fallback is the +Overloading `LearnAPI.strip` for new learners is optional. The fallback is the identity. New implementations must enforce the following identities, whenever the right-hand side is @@ -94,15 +94,15 @@ LearnAPI.strip(model) = model """ LearnAPI.feature_importances(model) -Return the algorithm-specific feature importances of a `model` output by -[`fit`](@ref)`(algorithm, ...)` for some `algorithm`. The value returned has the form of +Return the learner-specific feature importances of a `model` output by +[`fit`](@ref)`(learner, ...)` for some `learner`. The value returned has the form of an abstract vector of `feature::Symbol => importance::Real` pairs (e.g `[:gender => 0.23, :height => 0.7, :weight => 0.1]`). -The `algorithm` supports feature importances if `:(LearnAPI.feature_importances) in -LearnAPI.functions(algorithm)`. +The `learner` supports feature importances if `:(LearnAPI.feature_importances) in +LearnAPI.functions(learner)`. -If an algorithm is sometimes unable to report feature importances then +If a learner is sometimes unable to report feature importances then `LearnAPI.feature_importances` will return all importances as 0.0, as in `[:gender => 0.0, :height => 0.0, :weight => 0.0]`. @@ -124,7 +124,7 @@ an abstract vector of `feature_or_class::Symbol => coefficient::Real` pairs (e.g `feature::Symbol => coefficients::AbstractVector{<:Real}` pairs. The `model` reports coefficients if `:(LearnAPI.coefficients) in -LearnAPI.functions(Learn.algorithm(model))`. +LearnAPI.functions(Learn.learner(model))`. See also [`LearnAPI.intercept`](@ref). @@ -144,7 +144,7 @@ For a linear model, return the learned intercept. The value returned is `Real` target) or an `AbstractVector{<:Real}` (multi-target). The `model` reports intercept if `:(LearnAPI.intercept) in -LearnAPI.functions(Learn.algorithm(model))`. +LearnAPI.functions(Learn.learner(model))`. See also [`LearnAPI.coefficients`](@ref). @@ -200,8 +200,8 @@ function trees end """ LearnAPI.training_losses(model) -Return the training losses obtained when running `model = fit(algorithm, ...)` for some -`algorithm`. +Return the training losses obtained when running `model = fit(learner, ...)` for some +`learner`. See also [`fit`](@ref). @@ -218,8 +218,8 @@ function training_losses end """ LearnAPI.training_predictions(model) -Return internally computed training predictions when running `model = fit(algorithm, ...)` -for some `algorithm`. +Return internally computed training predictions when running `model = fit(learner, ...)` +for some `learner`. See also [`fit`](@ref). @@ -236,14 +236,14 @@ function training_predictions end """ LearnAPI.training_scores(model) -Return the training scores obtained when running `model = fit(algorithm, ...)` for some -`algorithm`. +Return the training scores obtained when running `model = fit(learner, ...)` for some +`learner`. See also [`fit`](@ref). # New implementations -Implement for algorithms, such as outlier detection algorithms, which associate a score +Implement for learners, such as outlier detection algorithms, which associate a score with each observation during training, where these scores are of interest in later processes (e.g, in defining normalized scores for new data). @@ -257,11 +257,11 @@ function training_scores end For a composite `model`, return the component models (`fit` outputs). These will be in the form of a vector of named pairs, `property_name::Symbol => component_model`. Here -`property_name` is the name of some algorithm-valued property (hyper-parameter) of -`algorithm = LearnAPI.algorithm(model)`. +`property_name` is the name of some learner-valued property (hyper-parameter) of +`learner = LearnAPI.learner(model)`. -A composite model is one for which the corresponding `algorithm` includes one or more -algorithm-valued properties, and for which `LearnAPI.is_composite(algorithm)` is `true`. +A composite model is one for which the corresponding `learner` includes one or more +learner-valued properties, and for which `LearnAPI.is_composite(learner)` is `true`. See also [`is_composite`](@ref). @@ -277,8 +277,8 @@ function components end """ LearnAPI.training_labels(model) -Return the training labels obtained when running `model = fit(algorithm, ...)` for some -`algorithm`. +Return the training labels obtained when running `model = fit(learner, ...)` for some +`learner`. See also [`fit`](@ref). @@ -292,7 +292,7 @@ function training_labels end # :extras intentionally excluded: const ACCESSOR_FUNCTIONS_WITHOUT_EXTRAS = ( - algorithm, + learner, coefficients, intercept, tree, @@ -316,8 +316,8 @@ const ACCESSOR_FUNCTIONS_WITHOUT_EXTRAS_LIST = join( """ LearnAPI.extras(model) -Return miscellaneous byproducts of an algorithm's computation, from the object `model` -returned by a call of the form `fit(algorithm, data)`. +Return miscellaneous byproducts of a learning algorithm's execution, from the +object `model` returned by a call of the form `fit(learner, data)`. $DOC_STATIC diff --git a/src/clone.jl b/src/clone.jl index 571ea7fe..fef6515d 100644 --- a/src/clone.jl +++ b/src/clone.jl @@ -1,23 +1,23 @@ """ - LearnAPI.clone(algorithm; replacements...) + LearnAPI.clone(learner; replacements...) -Return a shallow copy of `algorithm` with the specified hyperparameter replacements. +Return a shallow copy of `learner` with the specified hyperparameter replacements. ```julia -clone(algorithm; epochs=100, learning_rate=0.01) +clone(learner; epochs=100, learning_rate=0.01) ``` -It is guaranteed that `LearnAPI.clone(algorithm) == algorithm`. +It is guaranteed that `LearnAPI.clone(learner) == learner`. """ -function clone(algorithm; replacements...) +function clone(learner; replacements...) reps = NamedTuple(replacements) - names = propertynames(algorithm) + names = propertynames(learner) rep_names = keys(reps) new_values = map(names) do name name in rep_names && return getproperty(reps, name) - getproperty(algorithm, name) + getproperty(learner, name) end - return LearnAPI.constructor(algorithm)(NamedTuple{names}(new_values)...) + return LearnAPI.constructor(learner)(NamedTuple{names}(new_values)...) end diff --git a/src/fit_update.jl b/src/fit_update.jl index 96407651..2421acba 100644 --- a/src/fit_update.jl +++ b/src/fit_update.jl @@ -1,22 +1,23 @@ # # FIT """ - fit(algorithm, data; verbosity=1) - fit(algorithm; verbosity=1) + fit(learner, data; verbosity=1) + fit(learner; verbosity=1) -Execute the algorithm with configuration `algorithm` using the provided training `data`, -returning an object, `model`, on which other methods, such as [`predict`](@ref) or -[`transform`](@ref), can be dispatched. [`LearnAPI.functions(algorithm)`](@ref) returns a -list of methods that can be applied to either `algorithm` or `model`. +Execute the machine learning or statistical algorithm with configuration `learner` using +the provided training `data`, returning an object, `model`, on which other methods, such +as [`predict`](@ref) or [`transform`](@ref), can be dispatched. +[`LearnAPI.functions(learner)`](@ref) returns a list of methods that can be applied to +either `learner` or `model`. For example, a supervised classifier might have a workflow like this: ```julia -model = fit(algorithm, (X, y)) +model = fit(learner, (X, y)) ŷ = predict(model, Xnew) ``` -The second signature, with `data` omitted, is provided by algorithms that do not +The second signature, with `data` omitted, is provided by learners that do not generalize to new observations (called *static algorithms*). In that case, `transform(model, data)` or `predict(model, ..., data)` carries out the actual algorithm execution, writing any byproducts of that operation to the mutable object `model` returned @@ -31,7 +32,7 @@ See also [`predict`](@ref), [`transform`](@ref), [`inverse_transform`](@ref), # New implementations -Implementation of exactly one of the signatures is compulsory. If `fit(algorithm; +Implementation of exactly one of the signatures is compulsory. If `fit(learner; verbosity=1)` is implemented, then the trait [`LearnAPI.is_static`](@ref) must be overloaded to return `true`. @@ -45,9 +46,9 @@ these methods. A fallback returns `first(data)` if `data` is a tuple, and `data` otherwise. The LearnAPI.jl specification has nothing to say regarding `fit` signatures with more than -two arguments. For convenience, for example, an algorithm is free to implement a slurping -signature, such as `fit(algorithm, X, y, extras...) = fit(algorithm, (X, y, extras...))` but -LearnAPI.jl does not guarantee such signatures are actually implemented. +two arguments. For convenience, for example, an implementation is free to implement a +slurping signature, such as `fit(learner, X, y, extras...) = fit(learner, (X, y, +extras...))` but LearnAPI.jl does not guarantee such signatures are actually implemented. $(DOC_DATA_INTERFACE(:fit)) @@ -65,10 +66,10 @@ Return an updated version of the `model` object returned by a previous [`fit`](@ p2=value2, ...`. ```julia -algorithm = MyForest(ntrees=100) +learner = MyForest(ntrees=100) # train with 100 trees: -model = fit(algorithm, data) +model = fit(learner, data) # add 50 more trees: model = update(model, data; ntrees=150) @@ -76,13 +77,13 @@ model = update(model, data; ntrees=150) Provided that `data` is identical with the data presented in a preceding `fit` call *and* there is at most one hyperparameter replacement, as in the above example, execution is -semantically equivalent to the call `fit(algorithm, data)`, where `algorithm` is -`LearnAPI.algorithm(model)` with the specified replacements. In some cases (typically, +semantically equivalent to the call `fit(learner, data)`, where `learner` is +`LearnAPI.learner(model)` with the specified replacements. In some cases (typically, when changing an iteration parameter) there may be a performance benefit to using `update` instead of retraining ab initio. If `data` differs from that in the preceding `fit` or `update` call, or there is more than -one hyperparameter replacement, then behaviour is algorithm-specific. +one hyperparameter replacement, then behaviour is learner-specific. See also [`fit`](@ref), [`update_observations`](@ref), [`update_features`](@ref). @@ -104,19 +105,19 @@ Return an updated version of the `model` object returned by a previous [`fit`](@ specify hyperparameter replacements in the form `p1=value1, p2=value2, ...`. ```julia-repl -algorithm = MyNeuralNetwork(epochs=10, learning_rate=0.01) +learner = MyNeuralNetwork(epochs=10, learning_rate=0.01) # train for ten epochs: -model = fit(algorithm, data) +model = fit(learner, data) # train for two more epochs using new data and new learning rate: model = update_observations(model, new_data; epochs=2, learning_rate=0.1) ``` -When following the call `fit(algorithm, data)`, the `update` call is semantically +When following the call `fit(learner, data)`, the `update` call is semantically equivalent to retraining ab initio using a concatenation of `data` and `new_data`, *provided there are no hyperparameter replacements* (which rules out the example -above). Behaviour is otherwise algorithm-specific. +above). Behaviour is otherwise learner-specific. See also [`fit`](@ref), [`update`](@ref), [`update_features`](@ref). @@ -139,10 +140,10 @@ Return an updated version of the `model` object returned by a previous [`fit`](@ `update` call given the new features encapsulated in `new_data`. One may additionally specify hyperparameter replacements in the form `p1=value1, p2=value2, ...`. -When following the call `fit(algorithm, data)`, the `update` call is semantically +When following the call `fit(learner, data)`, the `update` call is semantically equivalent to retraining ab initio using a concatenation of `data` and `new_data`, *provided there are no hyperparameter replacements.* Behaviour is otherwise -algorithm-specific. +learner-specific. See also [`fit`](@ref), [`update`](@ref), [`update_features`](@ref). diff --git a/src/obs.jl b/src/obs.jl index 8b226211..d107fa77 100644 --- a/src/obs.jl +++ b/src/obs.jl @@ -1,14 +1,14 @@ """ - obs(algorithm, data) + obs(learner, data) obs(model, data) -Return an algorithm-specific representation of `data`, suitable for passing to `fit` +Return learner-specific representation of `data`, suitable for passing to `fit` (first signature) or to `predict` and `transform` (second signature), in place of -`data`. Here `model` is the return value of `fit(algorithm, ...)` for some LearnAPI.jl -algorithm, `algorithm`. +`data`. Here `model` is the return value of `fit(learner, ...)` for some LearnAPI.jl +learner, `learner`. The returned object is guaranteed to implement observation access as indicated by -[`LearnAPI.data_interface(algorithm)`](@ref), typically +[`LearnAPI.data_interface(learner)`](@ref), typically [`LearnAPI.RandomAccess()`](@ref). Calling `fit`/`predict`/`transform` on the returned objects may have performance @@ -23,18 +23,18 @@ Usual workflow, using data-specific resampling methods: ```julia data = (X, y) # a DataFrame and a vector data_train = (Tables.select(X, 1:100), y[1:100]) -model = fit(algorithm, data_train) +model = fit(learner, data_train) ŷ = predict(model, Point(), X[101:150]) ``` -Alternative workflow using `obs` and the MLUtils.jl method `getobs` (assumes -`LearnAPI.data_interface(algorithm) == RandomAccess()`): +Alternative, data agnostic, workflow using `obs` and the MLUtils.jl method `getobs` +(assumes `LearnAPI.data_interface(learner) == RandomAccess()`): ```julia import MLUtils -fit_observations = obs(algorithm, data) -model = fit(algorithm, MLUtils.getobs(fit_observations, 1:100)) +fit_observations = obs(learner, data) +model = fit(learner, MLUtils.getobs(fit_observations, 1:100)) predict_observations = obs(model, X) ẑ = predict(model, Point(), MLUtils.getobs(predict_observations, 101:150)) @@ -50,15 +50,15 @@ See also [`LearnAPI.data_interface`](@ref). Implementation is typically optional. -For each supported form of `data` in `fit(algorithm, data)`, it must be true that `model = -fit(algorithm, observations)` is equivalent to `model = fit(algorithm, data)`, whenever -`observations = obs(algorithm, data)`. For each supported form of `data` in calls +For each supported form of `data` in `fit(learner, data)`, it must be true that `model = +fit(learner, observations)` is equivalent to `model = fit(learner, data)`, whenever +`observations = obs(learner, data)`. For each supported form of `data` in calls `predict(model, ..., data)` and `transform(model, data)`, where implemented, the calls `predict(model, ..., observations)` and `transform(model, observations)` are supported alternatives, whenever `observations = obs(model, data)`. -The fallback for `obs` is `obs(model_or_algorithm, data) = data`, and the fallback for -`LearnAPI.data_interface(algorithm)` is `LearnAPI.RandomAccess()`. For details refer to +The fallback for `obs` is `obs(model_or_learner, data) = data`, and the fallback for +`LearnAPI.data_interface(learner)` is `LearnAPI.RandomAccess()`. For details refer to the [`LearnAPI.data_interface`](@ref) document string. In particular, if the `data` to be consumed by `fit`, `predict` or `transform` consists @@ -66,9 +66,9 @@ only of suitable tables and arrays, then `obs` and `LearnAPI.data_interface` do to be overloaded. However, the user will get no performance benefits by using `obs` in that case. -When overloading `obs(algorithm, data)` to output new model-specific representations of +When overloading `obs(learner, data)` to output new model-specific representations of data, it may be necessary to also overload [`LearnAPI.features`](@ref), -[`LearnAPI.target`](@ref) (supervised algorithms), and/or [`LearnAPI.weights`](@ref) (if +[`LearnAPI.target`](@ref) (supervised learners), and/or [`LearnAPI.weights`](@ref) (if weights are supported), for extracting relevant parts of the representation. ## Sample implementation @@ -78,4 +78,4 @@ Refer to the "Anatomy of an Implementation" section of the LearnAPI.jl """ -obs(algorithm_or_model, data) = data +obs(learner_or_model, data) = data diff --git a/src/predict_transform.jl b/src/predict_transform.jl index 726f263f..8bb0a254 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -7,7 +7,7 @@ end DOC_MUTATION(op) = """ - If [`LearnAPI.is_static(algorithm)`](@ref) is `true`, then `$op` may mutate it's first + If [`LearnAPI.is_static(learner)`](@ref) is `true`, then `$op` may mutate it's first argument, but not in a way that alters the result of a subsequent call to `predict`, `transform` or `inverse_transform`. See more at [`fit`](@ref). @@ -16,9 +16,9 @@ DOC_MUTATION(op) = DOC_SLURPING(op) = """ - An algorithm is free to implement `$op` signatures with additional positional - arguments (eg., data-slurping signatures) but LearnAPI.jl is silent about their - interpretation or existence. + An implementation is free to implement `$op` signatures with additional positional + arguments (eg., data-slurping signatures) but LearnAPI.jl is silent about their + interpretation or existence. """ @@ -29,7 +29,7 @@ DOC_MINIMIZE(func) = identity must hold: ```julia - $func(LearnAPI.strip(model), args...) = $func(model, args...) + $func(LearnAPI.strip(model), args...) == $func(model, args...) ``` """ @@ -41,11 +41,11 @@ DOC_DATA_INTERFACE(method) = By default, it is assumed that `data` supports the [`LearnAPI.RandomAccess`](@ref) interface; this includes all matrices, with observations-as-columns, most tables, and - tuples thereof). See [`LearnAPI.RandomAccess`](@ref) for details. If this is not the + tuples thereof. See [`LearnAPI.RandomAccess`](@ref) for details. If this is not the case then an implementation must either: (i) overload [`obs`](@ref) to articulate how provided data can be transformed into a form that does support [`LearnAPI.RandomAccess`](@ref); or (ii) overload the trait - [`LearnAPI.data_interface`](@ref) to specify a more relaxed data API. Refer to + [`LearnAPI.data_interface`](@ref) to specify a more relaxed data API. Refer tbo document strings for details. """ @@ -61,21 +61,21 @@ The first signature returns target predictions, or proxies for target prediction input features `data`, according to some `model` returned by [`fit`](@ref). Where supported, these are literally target predictions if `kind_of_proxy = Point()`, and probability density/mass functions if `kind_of_proxy = Distribution()`. List all -options with [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), where `algorithm = -LearnAPI.algorithm(model)`. +options with [`LearnAPI.kinds_of_proxy(learner)`](@ref), where `learner = +LearnAPI.learner(model)`. ```julia -model = fit(algorithm, (X, y)) +model = fit(learner, (X, y)) predict(model, Point(), Xnew) ``` -The shortcut `predict(model, data)` calls the first method with an algorithm-specific -`kind_of_proxy`, namely the first element of [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), +The shortcut `predict(model, data)` calls the first method with learner-specific +`kind_of_proxy`, namely the first element of [`LearnAPI.kinds_of_proxy(learner)`](@ref), which lists all supported target proxies. -The argument `model` is anything returned by a call of the form `fit(algorithm, ...)`. +The argument `model` is anything returned by a call of the form `fit(learner, ...)`. -If `LearnAPI.features(LearnAPI.algorithm(model)) == nothing`, then argument `data` is +If `LearnAPI.features(LearnAPI.learner(model)) == nothing`, then the argument `data` is omitted in both signatures. An example is density estimators. See also [`fit`](@ref), [`transform`](@ref), [`inverse_transform`](@ref). @@ -83,7 +83,7 @@ See also [`fit`](@ref), [`transform`](@ref), [`inverse_transform`](@ref). # Extended help Note `predict ` must not mutate any argument, except in the special case -`LearnAPI.is_static(algorithm) == true`. +`LearnAPI.is_static(learner) == true`. # New implementations @@ -91,11 +91,13 @@ If there is no notion of a "target" variable in the LearnAPI.jl sense, or you ne operation with an inverse, implement [`transform`](@ref) instead. Implementation is optional. Only the first signature (with or without the `data` argument) -is implemented, but each `kind_of_proxy` that gets an implementation must be added to the -list returned by [`LearnAPI.kinds_of_proxy`](@ref). +is implemented, but each `kind_of_proxy::`[`KindOfProxy`](@ref) that gets an +implementation must be added to the list returned by +[`LearnAPI.kinds_of_proxy(learner)`](@ref). List all available kinds of proxy by doing +`LearnAPI.kinds_of_proxy()`. If `data` is not present in the implemented signature (eg., for density estimators) then -[`LearnAPI.features(algorithm, data)`](@ref) must return `nothing`. +[`LearnAPI.features(learner, data)`](@ref) must return `nothing`. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.predict)")) @@ -106,8 +108,8 @@ $(DOC_MUTATION(:predict)) $(DOC_DATA_INTERFACE(:predict)) """ -predict(model, data) = predict(model, kinds_of_proxy(algorithm(model)) |> first, data) -predict(model) = predict(model, kinds_of_proxy(algorithm(model)) |> first) +predict(model, data) = predict(model, kinds_of_proxy(learner(model)) |> first, data) +predict(model) = predict(model, kinds_of_proxy(learner(model)) |> first) """ transform(model, data) @@ -119,28 +121,34 @@ Return a transformation of some `data`, using some `model`, as returned by Below, `X` and `Xnew` are data of the same form. -For an `algorithm` that generalizes to new data ("learns"): +For a `learner` that generalizes to new data ("learns"): ```julia -model = fit(algorithm, X; verbosity=0) +model = fit(learner, X; verbosity=0) transform(model, Xnew) ``` +or, in one step (where supported): + +```julia +W = transform(learner, X) # `fit` implied +``` + For a static (non-generalizing) transformer: ```julia -model = fit(algorithm) +model = fit(learner) W = transform(model, X) ``` or, in one step (where supported): ```julia -W = transform(algorithm, X) +W = transform(learner, X) # `fit` implied ``` Note `transform` does not mutate any argument, except in the special case -`LearnAPI.is_static(algorithm) == true`. +`LearnAPI.is_static(learner) == true`. See also [`fit`](@ref), [`predict`](@ref), [`inverse_transform`](@ref). @@ -149,7 +157,7 @@ See also [`fit`](@ref), [`predict`](@ref), # New implementations -Implementation for new LearnAPI.jl algorithms is +Implementation for new LearnAPI.jl learners is optional. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.transform)")) $(DOC_SLURPING(:transform)) @@ -169,15 +177,15 @@ function transform end Inverse transform `data` according to some `model` returned by [`fit`](@ref). Here "inverse" is to be understood broadly, e.g, an approximate -right inverse for [`transform`](@ref). +right or left inverse for [`transform`](@ref). # Example -In the following, `algorithm` is some dimension-reducing algorithm that generalizes to new +In the following, `learner` is some dimension-reducing algorithm that generalizes to new data (such as PCA); `Xtrain` is the training input and `Xnew` the input to be reduced: ```julia -model = fit(algorithm, Xtrain) +model = fit(learner, Xtrain) W = transform(model, Xnew) # reduced version of `Xnew` Ŵ = inverse_transform(model, W) # embedding of `W` in original space ``` diff --git a/src/target_weights_features.jl b/src/target_weights_features.jl index 58243030..aee3481a 100644 --- a/src/target_weights_features.jl +++ b/src/target_weights_features.jl @@ -1,9 +1,9 @@ """ - LearnAPI.target(algorithm, data) -> target + LearnAPI.target(learner, data) -> target -Return, for each form of `data` supported in a call of the form [`fit(algorithm, +Return, for each form of `data` supported in a call of the form [`fit(learner, data)`](@ref), the target variable part of `data`. If `nothing` is returned, the -`algorithm` does not see a target variable in training (is unsupervised). +`learner` does not see a target variable in training (is unsupervised). Refer to LearnAPI.jl documentation for the precise meaning of "target". @@ -18,9 +18,9 @@ $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.target)"; overloaded=true)) target(::Any, data) = nothing """ - LearnAPI.weights(algorithm, data) -> weights + LearnAPI.weights(learner, data) -> weights -Return, for each form of `data` supported in a call of the form [`fit(algorithm, +Return, for each form of `data` supported in a call of the form [`fit(learner, data)`](@ref), the per-observation weights part of `data`. Where `nothing` is returned, no weights are part of `data`, which is to be interpreted as uniform weighting. @@ -34,9 +34,9 @@ $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.weights)"; overloaded=true)) weights(::Any, data) = nothing """ - LearnAPI.features(algorithm, data) + LearnAPI.features(learner, data) -Return, for each form of `data` supported in a call of the form [`fit(algorithm, +Return, for each form of `data` supported in a call of the form [`fit(learner, data)`](@ref), the "features" part of `data` (as opposed to the target variable, for example). @@ -44,14 +44,14 @@ The returned object `X` may always be passed to `predict` or `transform`, where implemented, as in the following sample workflow: ```julia -model = fit(algorithm, data) +model = fit(learner, data) X = features(data) -ŷ = predict(algorithm, kind_of_proxy, X) # eg, `kind_of_proxy = Point()` +ŷ = predict(learner, kind_of_proxy, X) # eg, `kind_of_proxy = Point()` ``` The returned object has the same number of observations as `data`. For supervised models -(i.e., where `:(LearnAPI.target) in LearnAPI.functions(algorithm)`) `ŷ` above is generally -intended to be an approximate proxy for `LearnAPI.target(algorithm, data)`, the training +(i.e., where `:(LearnAPI.target) in LearnAPI.functions(learner)`) `ŷ` above is generally +intended to be an approximate proxy for `LearnAPI.target(learner, data)`, the training target. @@ -61,13 +61,13 @@ That the output can be passed to `predict` and/or `transform`, and has the same observations as `data`, are the only contracts. A fallback returns `first(data)` if `data` is a tuple, and otherwise returns `data`. -Overloading may be necessary if [`obs(algorithm, data)`](@ref) is overloaded to return -some algorithm-specific representation of training `data`. For density estimators, whose +Overloading may be necessary if [`obs(learner, data)`](@ref) is overloaded to return +some learner-specific representation of training `data`. For density estimators, whose `fit` typically consumes *only* a target variable, you should overload this method to return `nothing`. """ -features(algorithm, data) = _first(data) +features(learner, data) = _first(data) _first(data) = data _first(data::Tuple) = first(data) # note the factoring above guards against method ambiguities diff --git a/src/tools.jl b/src/tools.jl index 1b033f05..731860ff 100644 --- a/src/tools.jl +++ b/src/tools.jl @@ -9,9 +9,9 @@ function name_value_pair(ex) end """ - @trait(TypeEx, trait1=value1, trait2=value2, ...) + @trait(LearnerType, trait1=value1, trait2=value2, ...) -Overload a number of traits for algorithms of type `TypeEx`. For example, the code +Overload a number of traits for learners of type `LearnerType`. For example, the code ```julia @trait( @@ -29,13 +29,13 @@ LearnAPI.doc_url(::RidgeRegressor) = "https://some.cool.documentation", ``` """ -macro trait(algorithm_ex, exs...) +macro trait(learner_ex, exs...) program = quote end for ex in exs trait_ex, value_ex = name_value_pair(ex) push!( program.args, - :($LearnAPI.$trait_ex(::$algorithm_ex) = $value_ex), + :($LearnAPI.$trait_ex(::$learner_ex) = $value_ex), ) end return esc(program) diff --git a/src/traits.jl b/src/traits.jl index 9b566120..6886a2ec 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -1,17 +1,17 @@ # There are two types of traits - ordinary traits that an implementation overloads to make -# promises of algorithm behavior, and derived traits, which are never overloaded. +# promises of learner behavior, and derived traits, which are never overloaded. const DOC_UNKNOWN = - "Returns `\"unknown\"` if the algorithm implementation has "* + "Returns `\"unknown\"` if the learner implementation has "* "not overloaded the trait. " -const DOC_ON_TYPE = "The value of the trait must depend only on the type of `algorithm`. " +const DOC_ON_TYPE = "The value of the trait must depend only on the type of `learner`. " const DOC_EXPLAIN_EACHOBS = """ Here, "for each `o` in `observations`" is understood in the sense of - [`LearnAPI.data_interface(algorithm)`](@ref). For example, if - `LearnAPI.data_interface(algorithm) == Base.HasLength()`, then this means "for `o` in + [`LearnAPI.data_interface(learner)`](@ref). For example, if + `LearnAPI.data_interface(learner) == Base.HasLength()`, then this means "for `o` in `MLUtils.eachobs(observations)`". """ @@ -19,16 +19,16 @@ const DOC_EXPLAIN_EACHOBS = # # OVERLOADABLE TRAITS """ - Learn.API.constructor(algorithm) + Learn.API.constructor(learner) -Return a keyword constructor that can be used to clone `algorithm`: +Return a keyword constructor that can be used to clone `learner`: ```julia-repl -julia> algorithm.lambda +julia> learner.lambda 0.1 -julia> C = LearnAPI.constructor(algorithm) -julia> algorithm2 = C(lambda=0.2) -julia> algorithm2.lambda +julia> C = LearnAPI.constructor(learner) +julia> learner2 = C(lambda=0.2) +julia> learner2.lambda 0.2 ``` @@ -36,21 +36,21 @@ julia> algorithm2.lambda All new implementations must overload this trait. -Attach public LearnAPI.jl-related documentation for an algorithm to the constructor, not -the algorithm struct. +Attach public LearnAPI.jl-related documentation for learner to the constructor, not +the learner struct. -It must be possible to recover an algorithm from the constructor returned as follows: +It must be possible to recover learner from the constructor returned as follows: ```julia -properties = propertynames(algorithm) -named_properties = NamedTuple{properties}(getproperty.(Ref(algorithm), properties)) -@assert algorithm == LearnAPI.constructor(algorithm)(; named_properties...) +properties = propertynames(learner) +named_properties = NamedTuple{properties}(getproperty.(Ref(learner), properties)) +@assert learner == LearnAPI.constructor(learner)(; named_properties...) ``` -which can be tested with `@assert LearnAPI.clone(algorithm) == algorithm`. +which can be tested with `@assert LearnAPI.clone(learner) == learner`. The keyword constructor provided by `LearnAPI.constructor` must provide default values for -all properties, with the exception of those that can take other LearnAPI.jl algorithms as +all properties, with the exception of those that can take other LearnAPI.jl learners as values. These can be provided with the default `nothing`, with the constructor throwing an error if the default value persists. @@ -58,17 +58,17 @@ error if the default value persists. function constructor end """ - LearnAPI.functions(algorithm) + LearnAPI.functions(learner) Return a tuple of expressions representing functions that can be meaningfully applied -with `algorithm`, or an associated model (object returned by `fit(algorithm, ...)`, as the -first argument. Algorithm traits (methods for which `algorithm` is the *only* argument) +with `learner`, or an associated model (object returned by `fit(learner, ...)`, as the +first argument. Learner traits (methods for which `learner` is the *only* argument) are excluded. The returned tuple may include expressions like `:(DecisionTree.print_tree)`, which reference functions not owned by LearnAPI.jl. -The understanding is that `algorithm` is a LearnAPI-compliant object whenever the return +The understanding is that `learner` is a LearnAPI-compliant object whenever the return value is non-empty. # Extended help @@ -81,7 +81,7 @@ return value: | expression | implementation compulsory? | include in returned tuple? | |-----------------------------------|----------------------------|------------------------------------| | `:(LearnAPI.fit)` | yes | yes | -| `:(LearnAPI.algorithm)` | yes | yes | +| `:(LearnAPI.learner)` | yes | yes | | `:(LearnAPI.strip)` | no | yes | | `:(LearnAPI.obs)` | no | yes | | `:(LearnAPI.features)` | no | yes, unless `fit` consumes no data | @@ -96,13 +96,13 @@ return value: | < accessor functions> | no | only if implemented | Also include any implemented accessor functions, both those owned by LearnaAPI.jl, and any -algorithm-specific ones. The LearnAPI.jl accessor functions are: $ACCESSOR_FUNCTIONS_LIST +learner-specific ones. The LearnAPI.jl accessor functions are: $ACCESSOR_FUNCTIONS_LIST (`LearnAPI.strip` is always included). """ functions() = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -117,9 +117,9 @@ functions() = ( ) """ - LearnAPI.kinds_of_proxy(algorithm) + LearnAPI.kinds_of_proxy(learner) -Returns a tuple of all instances, `kind`, for which for which `predict(algorithm, kind, +Returns a tuple of all instances, `kind`, for which for which `predict(learner, kind, data...)` has a guaranteed implementation. Each such `kind` subtypes [`LearnAPI.KindOfProxy`](@ref). Examples are `Point()` (for predicting actual target values) and `Distributions()` (for predicting probability mass/density functions). @@ -135,21 +135,20 @@ See also [`LearnAPI.predict`](@ref), [`LearnAPI.KindOfProxy`](@ref). Must be overloaded whenever `predict` is implemented. -Elements of the returned tuple must be instances of types in the return value of -`LearnAPI.kinds_of_proxy()`, i.e., one of the following, described further in LearnAPI.jl -documentation: $CONCRETE_TARGET_PROXY_TYPES_LIST. +Elements of the returned tuple must be instances of [`LearnAPI.KindOfProxy`](@ref). List +all possibilities by running `LearnAPI.kinds_of_proxy()`. Suppose, for example, we have the following implementation of a supervised learner returning only probabilistic predictions: ```julia -LearnAPI.predict(algorithm::MyNewAlgorithmType, LearnAPI.Distribution(), Xnew) = ... +LearnAPI.predict(learner::MyNewLearnerType, LearnAPI.Distribution(), Xnew) = ... ``` Then we can declare ```julia -@trait MyNewAlgorithmType kinds_of_proxy = (LearnaAPI.Distribution(),) +@trait MyNewLearnerType kinds_of_proxy = (LearnaAPI.Distribution(),) ``` LearnAPI.jl provides the fallback for `predict(model, data)`. @@ -158,7 +157,12 @@ For more on target variables and target proxies, refer to the LearnAPI documenta """ kinds_of_proxy(::Any) = () -kinds_of_proxy() = CONCRETE_TARGET_PROXY_TYPES +kinds_of_proxy() = map(CONCRETE_TARGET_PROXY_SYMBOLS) do ex + quote + $ex() + end |> eval +end + tags() = [ @@ -166,7 +170,7 @@ tags() = [ "classification", "clustering", "gradient descent", - "iterative algorithms", + "iterative learners", "incremental algorithms", "feature engineering", "dimension reduction", @@ -189,9 +193,9 @@ tags() = [ ] """ - LearnAPI.tags(algorithm) + LearnAPI.tags(learner) -Lists one or more suggestive algorithm tags. Do `LearnAPI.tags()` to list +Lists one or more suggestive learner tags. Do `LearnAPI.tags()` to list all possible. !!! warning @@ -206,9 +210,9 @@ This trait should return a tuple of strings, as in `("classifier", "text analysi tags(::Any) = () """ - LearnAPI.is_pure_julia(algorithm) + LearnAPI.is_pure_julia(learner) -Returns `true` if training `algorithm` requires evaluation of pure Julia code only. +Returns `true` if training `learner` requires evaluation of pure Julia code only. # New implementations @@ -218,10 +222,10 @@ The fallback is `false`. is_pure_julia(::Any) = false """ - LearnAPI.pkg_name(algorithm) + LearnAPI.pkg_name(learner) Return the name of the package module which supplies the core training algorithm for -`algorithm`. This is not necessarily the package providing the LearnAPI +`learner`. This is not necessarily the package providing the LearnAPI interface. $DOC_UNKNOWN @@ -234,18 +238,18 @@ Must return a string, as in `"DecisionTree"`. pkg_name(::Any) = "unknown" """ - LearnAPI.pkg_license(algorithm) + LearnAPI.pkg_license(learner) Return the name of the software license, such as `"MIT"`, applying to the package where the -core algorithm for `algorithm` is implemented. +core algorithm for `learner` is implemented. """ pkg_license(::Any) = "unknown" """ - LearnAPI.doc_url(algorithm) + LearnAPI.doc_url(learner) -Return a url where the core algorithm for `algorithm` is documented. +Return a url where the core algorithm for `learner` is documented. $DOC_UNKNOWN @@ -257,11 +261,11 @@ Must return a string, such as `"https://en.wikipedia.org/wiki/Decision_tree_lear doc_url(::Any) = "unknown" """ - LearnAPI.load_path(algorithm) + LearnAPI.load_path(learner) -Return a string indicating where in code the definition of the algorithm's constructor can +Return a string indicating where in code the definition of the learner's constructor can be found, beginning with the name of the package module defining it. By "constructor" we -mean the return value of [`LearnAPI.constructor(algorithm)`](@ref). +mean the return value of [`LearnAPI.constructor(learner)`](@ref). # Implementation @@ -271,7 +275,7 @@ following julia code will not error: ```julia import FastTrees import LearnAPI -@assert FastTrees.LearnAPI.DecisionTreeClassifier == LearnAPI.constructor(algorithm) +@assert FastTrees.LearnAPI.DecisionTreeClassifier == LearnAPI.constructor(learner) ``` $DOC_UNKNOWN @@ -282,18 +286,18 @@ load_path(::Any) = "unknown" """ - LearnAPI.is_composite(algorithm) + LearnAPI.is_composite(learner) -Returns `true` if one or more properties (fields) of `algorithm` may themselves be -algorithms, and `false` otherwise. +Returns `true` if one or more properties (fields) of `learner` may themselves be +learners, and `false` otherwise. See also [`LearnAPI.components`](@ref). # New implementations -This trait should be overloaded if one or more properties (fields) of `algorithm` may take -algorithm values. Fallback return value is `false`. The keyword constructor for such an -algorithm need not prescribe defaults for algorithm-valued properties. Implementation of +This trait should be overloaded if one or more properties (fields) of `learner` may take +learner values. Fallback return value is `false`. The keyword constructor for such an +learner need not prescribe defaults for learner-valued properties. Implementation of the accessor function [`LearnAPI.components`](@ref) is recommended. $DOC_ON_TYPE @@ -303,9 +307,9 @@ $DOC_ON_TYPE is_composite(::Any) = false """ - LearnAPI.human_name(algorithm) + LearnAPI.human_name(learner) -Return a human-readable string representation of `typeof(algorithm)`. Primarily intended +Return a human-readable string representation of `typeof(learner)`. Primarily intended for auto-generation of documentation. # New implementations @@ -316,14 +320,14 @@ to return `"K-nearest neighbors regressor"`. Ideally, this is a "concrete" noun `"ridge regressor"` rather than an "abstract" noun like `"ridge regression"`. """ -human_name(algorithm) = snakecase(name(algorithm), delim=' ') # `name` defined below +human_name(learner) = snakecase(name(learner), delim=' ') # `name` defined below """ - LearnAPI.data_interface(algorithm) + LearnAPI.data_interface(learner) -Return the data interface supported by `algorithm` for accessing individual observations -in representations of input data returned by [`obs(algorithm, data)`](@ref) or -[`obs(model, data)`](@ref), whenever `algorithm == LearnAPI.algorithm(model)`. Here `data` +Return the data interface supported by `learner` for accessing individual observations +in representations of input data returned by [`obs(learner, data)`](@ref) or +[`obs(model, data)`](@ref), whenever `learner == LearnAPI.learner(model)`. Here `data` is `fit`, `predict`, or `transform`-consumable data. Possible return values are [`LearnAPI.RandomAccess`](@ref), @@ -340,17 +344,17 @@ tables, and tuples of these. See the doc-string for details. data_interface(::Any) = LearnAPI.RandomAccess() """ - LearnAPI.is_static(algorithm) + LearnAPI.is_static(learner) Returns `true` if [`fit`](@ref) is called with no data arguments, as in -`fit(algorithm)`. That is, `algorithm` does not generalize to new data, and data is only +`fit(learner)`. That is, `learner` does not generalize to new data, and data is only provided at the `predict` or `transform` step. For example, some clustering algorithms are applied with this workflow, to assign labels to the observations in `X`: ```julia -model = fit(algorithm) # no training data +model = fit(learner) # no training data labels = predict(model, X) # may mutate `model`! # extract some byproducts of the clustering algorithm (e.g., outliers): @@ -366,9 +370,9 @@ arguments. See more at [`fit`](@ref). is_static(::Any) = false """ - LearnAPI.iteration_parameter(algorithm) + LearnAPI.iteration_parameter(learner) -The name of the iteration parameter of `algorithm`, or `nothing` if the algorithm is not +The name of the iteration parameter of `learner`, or `nothing` if the algorithm is not iterative. # New implementations @@ -380,12 +384,12 @@ iteration_parameter(::Any) = nothing """ - LearnAPI.fit_observation_scitype(algorithm) + LearnAPI.fit_observation_scitype(learner) Return an upper bound `S` on the scitype of individual observations guaranteed to work -when calling `fit`: if `observations = obs(algorithm, data)` and +when calling `fit`: if `observations = obs(learner, data)` and `ScientificTypes.scitype(o) <:S` for each `o` in `observations`, then the call -`fit(algorithm, data)` is supported. +`fit(learner, data)` is supported. $DOC_EXPLAIN_EACHOBS @@ -399,14 +403,14 @@ Optional. The fallback return value is `Union{}`. fit_observation_scitype(::Any) = Union{} """ - LearnAPI.target_observation_scitype(algorithm) + LearnAPI.target_observation_scitype(learner) Return an upper bound `S` on the scitype of each observation of an applicable target variable. Specifically: -- If `:(LearnAPI.target) in LearnAPI.functions(algorithm)` (i.e., `fit` consumes target - variables) then "target" means anything returned by `LearnAPI.target(algorithm, data)`, - where `data` is an admissible argument in the call `fit(algorithm, data)`. +- If `:(LearnAPI.target) in LearnAPI.functions(learner)` (i.e., `fit` consumes target + variables) then "target" means anything returned by `LearnAPI.target(learner, data)`, + where `data` is an admissible argument in the call `fit(learner, data)`. - `S` will always be an upper bound on the scitype of (point) observations that could be conceivably extracted from the output of [`predict`](@ref). @@ -414,7 +418,7 @@ variable. Specifically: To illustate the second case, suppose we have ```julia -model = fit(algorithm, data) +model = fit(learner, data) ŷ = predict(model, Sampleable(), data_new) ``` @@ -433,8 +437,8 @@ target_observation_scitype(::Any) = Any # # DERIVED TRAITS -name(algorithm) = split(string(constructor(algorithm)), ".") |> last -is_algorithm(algorithm) = !isempty(functions(algorithm)) -preferred_kind_of_proxy(algorithm) = first(kinds_of_proxy(algorithm)) -target(algorithm) = :(LearnAPI.target) in functions(algorithm) -weights(algorithm) = :(LearnAPI.weights) in functions(algorithm) +name(learner) = split(string(constructor(learner)), ".") |> last +is_learner(learner) = !isempty(functions(learner)) +preferred_kind_of_proxy(learner) = first(kinds_of_proxy(learner)) +target(learner) = :(LearnAPI.target) in functions(learner) +weights(learner) = :(LearnAPI.weights) in functions(learner) diff --git a/src/types.jl b/src/types.jl index be40922f..8a53672d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -53,27 +53,35 @@ expectiles at 50% will provide `Point` instead. """ abstract type IID <: KindOfProxy end -struct Point <: IID end -struct Sampleable <: IID end -struct Distribution <: IID end -struct LogDistribution <: IID end -struct Probability <: IID end -struct LogProbability <: IID end -struct Parametric <: IID end -struct LabelAmbiguous <: IID end -struct LabelAmbiguousSampleable <: IID end -struct LabelAmbiguousDistribution <: IID end -struct LabelAmbiguousFuzzy <: IID end -struct ConfidenceInterval <: IID end -struct Fuzzy <: IID end -struct ProbabilisticFuzzy <: IID end -struct SurvivalFunction <: IID end -struct SurvivalDistribution <: IID end -struct HazardFunction <: IID end -struct OutlierScore <: IID end -struct Continuous <: IID end -struct Quantile <: IID end -struct Expectile <: IID end +const IID_SYMBOLS = [ + :Point, + :Sampleable, + :Distribution, + :LogDistribution, + :Probability, + :LogProbability, + :Parametric, + :LabelAmbiguous, + :LabelAmbiguousSampleable, + :LabelAmbiguousDistribution, + :LabelAmbiguousFuzzy, + :ConfidenceInterval, + :Fuzzy, + :ProbabilisticFuzzy, + :SurvivalFunction, + :SurvivalDistribution, + :HazardFunction, + :OutlierScore, + :Continuous, + :Quantile, + :Expectile, +] + +for S in IID_SYMBOLS + quote + struct $S <: IID end + end |> eval +end """ @@ -92,18 +100,27 @@ space ``Y^n``, where ``Y`` is the space from which the target variable takes its """ abstract type Joint <: KindOfProxy end -struct JointSampleable <: Joint end -struct JointDistribution <: Joint end -struct JointLogDistribution <: Joint end + +const JOINT_SYMBOLS = [ + :JointSampleable, + :JointDistribution, + :JointLogDistribution, +] + +for S in JOINT_SYMBOLS + quote + struct $S <: Joint end + end |> eval +end """ Single <: KindOfProxy -Abstract subtype of [`LearnAPI.KindOfProxy`](@ref). It applies only to algorithms for +Abstract subtype of [`LearnAPI.KindOfProxy`](@ref). It applies only to learners for which `predict` has no data argument, i.e., is of the form `predict(model, kind_of_proxy)`. An example is an algorithm learning a probability distribution from samples, and we regard the samples as drawn from the "target" variable. If in this case, -`kind_of_proxy` is an instance of `LearnAPI.Single` then, `predict(algorithm)` returns a +`kind_of_proxy` is an instance of `LearnAPI.Single` then, `predict(learner)` returns a single object representing a probability distribution. | type `T` | form of output of `predict(model, ::T)` | @@ -114,49 +131,54 @@ single object representing a probability distribution. """ abstract type Single <: KindOfProxy end -struct SingleSampeable <: Single end -struct SingleDistribution <: Single end -struct SingleLogDistribution <: Single end - -const CONCRETE_TARGET_PROXY_TYPES = [ - subtypes(IID)..., - subtypes(Single)..., - subtypes(Joint)..., + +const SINGLE_SYMBOLS = [ + :SingleSampeable, + :SingleDistribution, + :SingleLogDistribution, ] -const CONCRETE_TARGET_PROXY_TYPES_SYMBOLS = map(CONCRETE_TARGET_PROXY_TYPES) do T - Symbol(last(split(string(T), '.'))) +for S in SINGLE_SYMBOLS + quote + struct $S <: Single end + end |> eval end -const CONCRETE_TARGET_PROXY_TYPES_LIST = join( - map(CONCRETE_TARGET_PROXY_TYPES_SYMBOLS) do s - "`$s()`" - end, - ", ", - " and ", -) - -const DOC_HOW_TO_LIST_PROXIES = - "The instances of [`LearnAPI.KindOfProxy`](@ref) are: "* - "$(LearnAPI.CONCRETE_TARGET_PROXY_TYPES_LIST). " - +const CONCRETE_TARGET_PROXY_SYMBOLS = [ + IID_SYMBOLS..., + SINGLE_SYMBOLS..., + JOINT_SYMBOLS..., +] """ LearnAPI.KindOfProxy Abstract type whose concrete subtypes `T` each represent a different kind of proxy for -some target variable, associated with some algorithm. Instances `T()` are used to request +some target variable, associated with some learner. Instances `T()` are used to request the form of target predictions in [`predict`](@ref) calls. See LearnAPI.jl documentation for an explanation of "targets" and "target proxies". -For example, `Distribution` is a concrete subtype of `LearnAPI.KindOfProxy` and a call -like `predict(model, Distribution(), Xnew)` returns a data object whose observations are -probability density/mass functions, assuming `algorithm` supports predictions of that -form. +For example, `Distribution` is a concrete subtype of `IID <: LearnAPI.KindOfProxy` and a +call like `predict(model, Distribution(), Xnew)` returns a data object whose observations +are probability density/mass functions, assuming `learner = LearnAPI.learner(model)` +supports predictions of that form, which is true if `Distribution() in` +[`LearnAPI.kinds_of_proxy(learner)`](@ref). + +Proxy types are grouped under three abstract subtypes: + +- [`LearnAPI.IID`](@ref): The main type, for proxies consisting of uncorrelated individual + components, one for each input observation + +- [`LearnAPI.Joint`](@ref): For learners that predict a single probabilistic structure + encapsulating correlations between target predictions for different input observations + +- [`LearnAPI.Single`](@ref): For learners, such as density estimators, that are trained on + a target variable only (no features); `predict` consumes no data and the returned target + proxy is a single probabilistic structure. -$DOC_HOW_TO_LIST_PROXIES +For lists of all concrete instances, refer to documentation for the relevant subtype. """ KindOfProxy @@ -180,15 +202,15 @@ All arrays implement `RandomAccess`, with the last index being the observation i (observations-as-columns in matrices). A Tables.jl compatible table `data` implements `RandomAccess` if `Tables.istable(data)` is -true and if `data` implements `DataAPI.nrows`. This includes many tables, and in +true and if `data` implements `DataAPI.nrow`. This includes many tables, and in particular, `DataFrame`s. Tables that are also tuples are explicitly excluded. Any tuple of objects implementing `RandomAccess` also implements `RandomAccess`. -If [`LearnAPI.data_interface(algorithm)`](@ref) takes the value `RandomAccess()`, then -[`obs`](@ref)`(algorithm, ...)` is guaranteed to return objects implementing the +If [`LearnAPI.data_interface(learner)`](@ref) takes the value `RandomAccess()`, then +[`obs`](@ref)`(learner, ...)` is guaranteed to return objects implementing the `RandomAccess` interface, and the same holds for `obs(model, ...)`, whenever -`LearnAPI.algorithm(model) == algorithm`. +`LearnAPI.learner(model) == learner`. # Implementing `RandomAccess` for new data types @@ -211,10 +233,10 @@ it implements Julia's `iterate` interface, including `Base.length`, and if - `data isa MLUtils.DataLoader`, which includes output from `MLUtils.eachobs`. -If [`LearnAPI.data_interface(algorithm)`](@ref) takes the value `FiniteIterable()`, then -[`obs`](@ref)`(algorithm, ...)` is guaranteed to return objects implementing the +If [`LearnAPI.data_interface(learner)`](@ref) takes the value `FiniteIterable()`, then +[`obs`](@ref)`(learner, ...)` is guaranteed to return objects implementing the `FiniteIterable` interface, and the same holds for `obs(model, ...)`, whenever -`LearnAPI.algorithm(model) == algorithm`. +`LearnAPI.learner(model) == learner`. See also [`LearnAPI.RandomAccess`](@ref), [`LearnAPI.Iterable`](@ref). """ @@ -227,10 +249,10 @@ A data interface type. We say that `data` implements the `Iterable` interface if implements Julia's basic `iterate` interface. (Such objects may not implement `MLUtils.numobs` or `Base.length`.) -If [`LearnAPI.data_interface(algorithm)`](@ref) takes the value `Iterable()`, then -[`obs`](@ref)`(algorithm, ...)` is guaranteed to return objects implementing `Iterable`, -and the same holds for `obs(model, ...)`, whenever `LearnAPI.algorithm(model) == -algorithm`. +If [`LearnAPI.data_interface(learner)`](@ref) takes the value `Iterable()`, then +[`obs`](@ref)`(learner, ...)` is guaranteed to return objects implementing `Iterable`, +and the same holds for `obs(model, ...)`, whenever `LearnAPI.learner(model) == +learner`. See also [`LearnAPI.FiniteIterable`](@ref), [`LearnAPI.RandomAccess`](@ref). diff --git a/test/patterns/ensembling.jl b/test/patterns/ensembling.jl index ad348e4a..73b864b8 100644 --- a/test/patterns/ensembling.jl +++ b/test/patterns/ensembling.jl @@ -9,10 +9,10 @@ using StableRNGs # # ENSEMBLE OF REGRESSORS (A MODEL WRAPPER) -# We implement a toy algorithm that creates an bagged ensemble of regressors, i.e, where -# each atomic model is trained on a random sample of the training observations (same -# number, but sampled with replacement). In particular this algorithm has an iteration -# parameter `n`, and we implement `update` for warm restarts when `n` increases. +# We implement a learner that creates an bagged ensemble of regressors, i.e, where each +# atomic model is trained on a random sample of the training observations (same number, +# but sampled with replacement). In particular this learner has an iteration parameter +# `n`, and we implement `update` to execute a warm restarts when `n` increases. # no docstring here - that goes with the constructor; some fields left abstract for # simplicity @@ -23,9 +23,9 @@ struct Ensemble n::Int end -# Since the `atom` hyperparameter is another algorithm, it doesn't need a default in the -# kwarg constructor, but we do need to overload the `LearnAPI.is_composite` trait (done -# later). +# Since the `atom` hyperparameter is another learner, the user must explicitly set it in +# constructor calls or an error is thrown. We also need to overload the +# `LearnAPI.is_composite` trait (done later). """ Ensemble(atom; rng=Random.default_rng(), n=10) @@ -36,33 +36,33 @@ Instantiate a bagged ensemble of `n` regressors, with base regressor `atom`, etc Ensemble(atom; rng=Random.default_rng(), n=10) = Ensemble(atom, rng, n) # `LearnAPI.constructor` defined later -# pure keyword argument constructor: +# need a pure keyword argument constructor: function Ensemble(; atom=nothing, kwargs...) isnothing(atom) && error("You must specify `atom=...` ") Ensemble(atom; kwargs...) end struct EnsembleFitted - algorithm::Ensemble + learner::Ensemble atom::Ridge - rng # mutated copy of `algorithm.rng` + rng # mutated copy of `learner.rng` models # leaving type abstract for simplicity end -LearnAPI.algorithm(model::EnsembleFitted) = model.algorithm +LearnAPI.learner(model::EnsembleFitted) = model.learner # We add the same data interface that the atomic regressor uses: -LearnAPI.obs(algorithm::Ensemble, data) = LearnAPI.obs(algorithm.atom, data) +LearnAPI.obs(learner::Ensemble, data) = LearnAPI.obs(learner.atom, data) LearnAPI.obs(model::EnsembleFitted, data) = LearnAPI.obs(first(model.models), data) -LearnAPI.target(algorithm::Ensemble, data) = LearnAPI.target(algorithm.atom, data) -LearnAPI.features(algorithm::Ensemble, data) = LearnAPI.features(algorithm.atom, data) +LearnAPI.target(learner::Ensemble, data) = LearnAPI.target(learner.atom, data) +LearnAPI.features(learner::Ensemble, data) = LearnAPI.features(learner.atom, data) -function LearnAPI.fit(algorithm::Ensemble, data; verbosity=1) +function LearnAPI.fit(learner::Ensemble, data; verbosity=1) # unpack hyperparameters: - atom = algorithm.atom - rng = deepcopy(algorithm.rng) # to prevent mutation of `algorithm`! - n = algorithm.n + atom = learner.atom + rng = deepcopy(learner.rng) # to prevent mutation of `learner`! + n = learner.n # ensure data can be subsampled using MLUtils.jl, and that we're feeding the atomic # `fit` data in an efficient (pre-processed) form: @@ -87,7 +87,7 @@ function LearnAPI.fit(algorithm::Ensemble, data; verbosity=1) # make some noise, if allowed: verbosity > 0 && @info "Trained $n ridge regression models. " - return EnsembleFitted(algorithm, atom, rng, models) + return EnsembleFitted(learner, atom, rng, models) end @@ -97,16 +97,16 @@ end # models. Otherwise, update is equivalent to retraining from scratch, with the provided # hyperparameter updates. function LearnAPI.update(model::EnsembleFitted, data; verbosity=1, replacements...) - algorithm_old = LearnAPI.algorithm(model) - algorithm = LearnAPI.clone(algorithm_old; replacements...) + learner_old = LearnAPI.learner(model) + learner = LearnAPI.clone(learner_old; replacements...) - :n in keys(replacements) || return fit(algorithm, data) + :n in keys(replacements) || return fit(learner, data) - n = algorithm.n - Δn = n - algorithm_old.n - n < 0 && return fit(model, algorithm) + n = learner.n + Δn = n - learner_old.n + n < 0 && return fit(model, learner) - atom = algorithm.atom + atom = learner.atom observations = obs(atom, data) N = MLUtils.numobs(observations) @@ -125,7 +125,7 @@ function LearnAPI.update(model::EnsembleFitted, data; verbosity=1, replacements. # make some noise, if allowed: verbosity > 0 && @info "Trained $Δn additional ridge regression models. " - return EnsembleFitted(algorithm, atom, rng, models) + return EnsembleFitted(learner, atom, rng, models) end LearnAPI.predict(model::EnsembleFitted, ::Point, data) = @@ -134,13 +134,13 @@ LearnAPI.predict(model::EnsembleFitted, ::Point, data) = end LearnAPI.strip(model::EnsembleFitted) = EnsembleFitted( - model.algorithm, + model.learner, model.atom, model.rng, LearnAPI.strip.(Ref(model.atom), models), ) -# note the inclusion of `iteration_parameter`: +# learner traits (note the inclusion of `iteration_parameter`): @trait( Ensemble, constructor = Ensemble, @@ -150,7 +150,7 @@ LearnAPI.strip(model::EnsembleFitted) = EnsembleFitted( tags = ("regression", "ensemble algorithms", "iterative models"), functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -161,10 +161,10 @@ LearnAPI.strip(model::EnsembleFitted) = EnsembleFitted( ) # convenience method: -LearnAPI.fit(algorithm::Ensemble, X, y, extras...; kwargs...) = - fit(algorithm, (X, y, extras...); kwargs...) -LearnAPI.update(algorithm::EnsembleFitted, X, y, extras...; kwargs...) = - update(algorithm, (X, y, extras...); kwargs...) +LearnAPI.fit(learner::Ensemble, X, y, extras...; kwargs...) = + fit(learner, (X, y, extras...); kwargs...) +LearnAPI.update(learner::EnsembleFitted, X, y, extras...; kwargs...) = + update(learner, (X, y, extras...); kwargs...) # synthetic test data: @@ -182,15 +182,15 @@ Xtest = Tables.subset(X, test) @testset "test an implementation of bagged ensemble of ridge regressors" begin rng = StableRNG(123) atom = Ridge() - algorithm = Ensemble(atom; n=4, rng) - @test LearnAPI.clone(algorithm) == algorithm - @test :(LearnAPI.obs) in LearnAPI.functions(algorithm) - @test LearnAPI.target(algorithm, data) == y - @test LearnAPI.features(algorithm, data) == X + learner = Ensemble(atom; n=4, rng) + @test LearnAPI.clone(learner) == learner + @test :(LearnAPI.obs) in LearnAPI.functions(learner) + @test LearnAPI.target(learner, data) == y + @test LearnAPI.features(learner, data) == X model = @test_logs( (:info, r"Trained 4 ridge"), - fit(algorithm, Xtrain, y[train]; verbosity=1), + fit(learner, Xtrain, y[train]; verbosity=1), ); ŷ4 = predict(model, Point(), Xtest) @@ -201,13 +201,13 @@ Xtest = Tables.subset(X, test) ŷ7 = predict(model, Xtest) # compare with cold restart: - model_cold = fit(LearnAPI.clone(algorithm; n=7), Xtrain, y[train]; verbosity=0); + model_cold = fit(LearnAPI.clone(learner; n=7), Xtrain, y[train]; verbosity=0); @test ŷ7 ≈ predict(model_cold, Xtest) # test that we get a cold restart if another hyperparameter is changed: model2 = update(model, Xtrain, y[train]; atom=Ridge(0.05)) - algorithm2 = Ensemble(Ridge(0.05); n=7, rng) - model_cold = fit(algorithm2, Xtrain, y[train]; verbosity=0) + learner2 = Ensemble(Ridge(0.05); n=7, rng) + model_cold = fit(learner2, Xtrain, y[train]; verbosity=0) @test predict(model2, Xtest) ≈ predict(model_cold, Xtest) end diff --git a/test/patterns/gradient_descent.jl b/test/patterns/gradient_descent.jl index 19f0d363..27c9791e 100644 --- a/test/patterns/gradient_descent.jl +++ b/test/patterns/gradient_descent.jl @@ -22,12 +22,12 @@ import ComponentArrays # - `iteration_parameter` # - `training_losses` # - `obs` for pre-processing (non-tabular) classification training data -# - `predict(algorithm, ::Distribution, Xnew)` +# - `predict(learner, ::Distribution, Xnew)` # For simplicity, we use single-observation batches for gradient descent updates, and we -# may dodge some standard optimizations. +# may dodge some optimizations. -# This is also an example of a probability-predicting classifier. +# This is an example of a probability-predicting classifier. # ## Helpers @@ -38,7 +38,7 @@ import ComponentArrays Return Brier (quadratic) loss. - `probs`: predicted probability vector -- `hot`: corresponding ground truth observation, as a one-hot encoded bit vector +- `hot`: corresponding ground truth observation, as a one-hot encoded `BitVector` """ function brier_loss(probs, hot) @@ -54,8 +54,8 @@ for the specified number of `epochs`. - `perceptron`: component array with components `weights` and `bias` - `optimiser`: optimiser from Optimiser.jl -- `X`: feature matrix, of size (p, n) -- `y_hot`: one-hot encoded target, of size (nclasses, n) +- `X`: feature matrix, of size `(p, n)` +- `y_hot`: one-hot encoded target, of size `(nclasses, n)` - `epochs`: number of epochs - `state`: optimiser state @@ -83,7 +83,7 @@ end # ## Implementation -# ### Algorithm +# ### Learner # no docstring here - that goes with the constructor; # SOME FIELDS LEFT ABSTRACT FOR SIMPLICITY @@ -98,7 +98,7 @@ end Instantiate a perceptron classifier. -Train an instance, `algorithm`, by doing `model = fit(algorithm, X, y)`, where +Train an instance, `learner`, by doing `model = fit(learner, X, y)`, where - `X is a `Float32` matrix, with observations-as-columns - `y` (target) is some one-dimensional `CategoricalArray`. @@ -112,7 +112,7 @@ point predictions with `predict(model, Point(), Xnew)`. Return an updated model, with the weights and bias of the previously learned perceptron used as the starting state in new gradient descent updates. Adopt any specified -hyperparameter `replacements` (properties of `LearnAPI.algorithm(model)`). +hyperparameter `replacements` (properties of `LearnAPI.learner(model)`). update(model, newdata; epochs=n, replacements...) @@ -120,8 +120,8 @@ If `Δepochs = n - perceptron.epochs` is non-negative, then return an updated mo the weights and bias of the previously learned perceptron used as the starting state in new gradient descent updates for `Δepochs` epochs, and using the provided `newdata` instead of the previous training data. Any other hyperparaameter `replacements` are also -adopted. If `Δepochs` is negative or not specified, instead return `fit(algorithm, -newdata)`, where `algorithm=LearnAPI.clone(algorithm; epochs=n, replacements....)`. +adopted. If `Δepochs` is negative or not specified, instead return `fit(learner, +newdata)`, where `learner=LearnAPI.clone(learner; epochs=n, replacements....)`. """ PerceptronClassifier(; epochs=50, optimiser=Optimisers.Adam(), rng=Random.default_rng()) = @@ -131,9 +131,9 @@ PerceptronClassifier(; epochs=50, optimiser=Optimisers.Adam(), rng=Random.defaul # ### Data interface # For raw training data: -LearnAPI.target(algorithm::PerceptronClassifier, data::Tuple) = last(data) +LearnAPI.target(learner::PerceptronClassifier, data::Tuple) = last(data) -# For wrapping pre-processed training data (output of `obs(algorithm, data)`): +# For wrapping pre-processed training data (output of `obs(learner, data)`): struct PerceptronClassifierObs X::Matrix{Float32} y_hot::BitMatrix # one-hot encoded target @@ -141,7 +141,7 @@ struct PerceptronClassifierObs end # For pre-processing the training data: -function LearnAPI.obs(algorithm::PerceptronClassifier, data::Tuple) +function LearnAPI.obs(learner::PerceptronClassifier, data::Tuple) X, y = data classes = CategoricalDistributions.classes(y) y_hot = classes .== permutedims(y) # one-hot encoding @@ -157,12 +157,12 @@ Base.getindex(observations, I) = PerceptronClassifierObs( ) LearnAPI.target( - algorithm::PerceptronClassifier, + learner::PerceptronClassifier, observations::PerceptronClassifierObs, ) = observations.y LearnAPI.features( - algorithm::PerceptronClassifier, + learner::PerceptronClassifier, observations::PerceptronClassifierObs, ) = observations.X @@ -174,26 +174,26 @@ LearnAPI.features( # For wrapping outcomes of learning: struct PerceptronClassifierFitted - algorithm::PerceptronClassifier + learner::PerceptronClassifier perceptron # component array storing weights and bias state # optimiser state classes # target classes losses end -LearnAPI.algorithm(model::PerceptronClassifierFitted) = model.algorithm +LearnAPI.learner(model::PerceptronClassifierFitted) = model.learner -# `fit` for pre-processed data (output of `obs(algorithm, data)`): +# `fit` for pre-processed data (output of `obs(learner, data)`): function LearnAPI.fit( - algorithm::PerceptronClassifier, + learner::PerceptronClassifier, observations::PerceptronClassifierObs; verbosity=1, ) # unpack hyperparameters: - epochs = algorithm.epochs - optimiser = algorithm.optimiser - rng = deepcopy(algorithm.rng) # to prevent mutation of `algorithm`! + epochs = learner.epochs + optimiser = learner.optimiser + rng = deepcopy(learner.rng) # to prevent mutation of `learner`! # unpack data: X = observations.X @@ -211,12 +211,12 @@ function LearnAPI.fit( perceptron, state, losses = corefit(perceptron, X, y_hot, epochs, state, verbosity) - return PerceptronClassifierFitted(algorithm, perceptron, state, classes, losses) + return PerceptronClassifierFitted(learner, perceptron, state, classes, losses) end # `fit` for unprocessed data: -LearnAPI.fit(algorithm::PerceptronClassifier, data; kwargs...) = - fit(algorithm, obs(algorithm, data); kwargs...) +LearnAPI.fit(learner::PerceptronClassifier, data; kwargs...) = + fit(learner, obs(learner, data); kwargs...) # see the `PerceptronClassifier` docstring for `update_observations` logic. function LearnAPI.update_observations( @@ -234,21 +234,21 @@ function LearnAPI.update_observations( classes == model.classes || error("New training target has incompatible classes.") - algorithm_old = LearnAPI.algorithm(model) - algorithm = LearnAPI.clone(algorithm_old; replacements...) + learner_old = LearnAPI.learner(model) + learner = LearnAPI.clone(learner_old; replacements...) perceptron = model.perceptron state = model.state losses = model.losses - epochs = algorithm.epochs + epochs = learner.epochs perceptron, state, losses_new = corefit(perceptron, X, y_hot, epochs, state, verbosity) losses = vcat(losses, losses_new) - return PerceptronClassifierFitted(algorithm, perceptron, state, classes, losses) + return PerceptronClassifierFitted(learner, perceptron, state, classes, losses) end LearnAPI.update_observations(model::PerceptronClassifierFitted, data; kwargs...) = - update_observations(model, obs(LearnAPI.algorithm(model), data); kwargs...) + update_observations(model, obs(LearnAPI.learner(model), data); kwargs...) # see the `PerceptronClassifier` docstring for `update` logic. function LearnAPI.update( @@ -266,25 +266,25 @@ function LearnAPI.update( classes == model.classes || error("New training target has incompatible classes.") - algorithm_old = LearnAPI.algorithm(model) - algorithm = LearnAPI.clone(algorithm_old; replacements...) - :epochs in keys(replacements) || return fit(algorithm, observations) + learner_old = LearnAPI.learner(model) + learner = LearnAPI.clone(learner_old; replacements...) + :epochs in keys(replacements) || return fit(learner, observations) perceptron = model.perceptron state = model.state losses = model.losses - epochs = algorithm.epochs - Δepochs = epochs - algorithm_old.epochs - epochs < 0 && return fit(model, algorithm) + epochs = learner.epochs + Δepochs = epochs - learner_old.epochs + epochs < 0 && return fit(model, learner) perceptron, state, losses_new = corefit(perceptron, X, y_hot, Δepochs, state, verbosity) losses = vcat(losses, losses_new) - return PerceptronClassifierFitted(algorithm, perceptron, state, classes, losses) + return PerceptronClassifierFitted(learner, perceptron, state, classes, losses) end LearnAPI.update(model::PerceptronClassifierFitted, data; kwargs...) = - update(model, obs(LearnAPI.algorithm(model), data); kwargs...) + update(model, obs(LearnAPI.learner(model), data); kwargs...) # ### Predict @@ -315,7 +315,7 @@ LearnAPI.training_losses(model::PerceptronClassifierFitted) = model.losses tags = ("classification", "iterative algorithms", "incremental algorithms"), functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -330,12 +330,12 @@ LearnAPI.training_losses(model::PerceptronClassifierFitted) = model.losses # ### Convenience methods -LearnAPI.fit(algorithm::PerceptronClassifier, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) -LearnAPI.update_observations(algorithm::PerceptronClassifier, X, y; kwargs...) = - update_observations(algorithm, (X, y); kwargs...) -LearnAPI.update(algorithm::PerceptronClassifier, X, y; kwargs...) = - update(algorithm, (X, y); kwargs...) +LearnAPI.fit(learner::PerceptronClassifier, X, y; kwargs...) = + fit(learner, (X, y); kwargs...) +LearnAPI.update_observations(learner::PerceptronClassifier, X, y; kwargs...) = + update_observations(learner, (X, y); kwargs...) +LearnAPI.update(learner::PerceptronClassifier, X, y; kwargs...) = + update(learner, (X, y); kwargs...) # ## Tests @@ -364,13 +364,13 @@ ytest = y[test]; @testset "PerceptronClassfier" begin rng = StableRNG(123) - algorithm = PerceptronClassifier(; optimiser=Optimisers.Adam(0.01), epochs=40, rng) - @test LearnAPI.clone(algorithm) == algorithm - @test :(LearnAPI.update) in LearnAPI.functions(algorithm) - @test LearnAPI.target(algorithm, (X, y)) == y - @test LearnAPI.features(algorithm, (X, y)) == X + learner = PerceptronClassifier(; optimiser=Optimisers.Adam(0.01), epochs=40, rng) + @test LearnAPI.clone(learner) == learner + @test :(LearnAPI.update) in LearnAPI.functions(learner) + @test LearnAPI.target(learner, (X, y)) == y + @test LearnAPI.features(learner, (X, y)) == X - model40 = fit(algorithm, Xtrain, ytrain; verbosity=0) + model40 = fit(learner, Xtrain, ytrain; verbosity=0) # 40 epochs is sufficient for 90% accuracy in this case: @test sum(predict(model40, Point(), Xtest) .== ytest)/length(ytest) > 0.9 @@ -385,7 +385,7 @@ ytest = y[test]; @test !(ŷ70 ≈ ŷ40) # compare with cold restart: - model = fit(LearnAPI.clone(algorithm; epochs=70), Xtrain, y[train]; verbosity=0); + model = fit(LearnAPI.clone(learner; epochs=70), Xtrain, y[train]; verbosity=0); @test ŷ70 ≈ predict(model, Xtest) # instead add 30 epochs using `update_observations` instead: diff --git a/test/patterns/incremental_algorithms.jl b/test/patterns/incremental_algorithms.jl index ff1a0352..20b01779 100644 --- a/test/patterns/incremental_algorithms.jl +++ b/test/patterns/incremental_algorithms.jl @@ -15,7 +15,7 @@ import Distributions """ NormalEstimator() -Instantiate an algorithm for finding the maximum likelihood normal distribution fitting +Instantiate a learner for finding the maximum likelihood normal distribution fitting some real univariate data `y`. Estimates can be updated with new data. ```julia @@ -46,7 +46,7 @@ struct NormalEstimatorFitted{T} n::Int end -LearnAPI.algorithm(::NormalEstimatorFitted) = NormalEstimator() +LearnAPI.learner(::NormalEstimatorFitted) = NormalEstimator() function LearnAPI.fit(::NormalEstimator, y) n = length(y) @@ -94,7 +94,7 @@ LearnAPI.extras(model::NormalEstimatorFitted) = (μ=model.ȳ, σ=sqrt(model.ss/ human_name = "normal distribution estimator", functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -111,8 +111,8 @@ LearnAPI.extras(model::NormalEstimatorFitted) = (μ=model.ȳ, σ=sqrt(model.ss/ rng = StableRNG(123) y = rand(rng, 50); ynew = rand(rng, 10); - algorithm = NormalEstimator() - model = fit(algorithm, y) + learner = NormalEstimator() + model = fit(learner, y) d = predict(model) μ, σ = Distributions.params(d) @test μ ≈ mean(y) @@ -122,14 +122,14 @@ LearnAPI.extras(model::NormalEstimatorFitted) = (μ=model.ȳ, σ=sqrt(model.ss/ @test LearnAPI.extras(model) == (; μ, σ) # one-liner: - @test predict(algorithm, y) == d - @test predict(algorithm, Point(), y) ≈ μ - @test predict(algorithm, ConfidenceInterval(), y)[1] ≈ quantile(d, 0.025) + @test predict(learner, y) == d + @test predict(learner, Point(), y) ≈ μ + @test predict(learner, ConfidenceInterval(), y)[1] ≈ quantile(d, 0.025) # updating: model = update_observations(model, ynew) μ2, σ2 = LearnAPI.extras(model) - μ3, σ3 = LearnAPI.extras(fit(algorithm, vcat(y, ynew))) # training ab initio + μ3, σ3 = LearnAPI.extras(fit(learner, vcat(y, ynew))) # training ab initio @test μ2 ≈ μ3 @test σ2 ≈ σ3 end diff --git a/test/patterns/regression.jl b/test/patterns/regression.jl index 35376519..f7d8d073 100644 --- a/test/patterns/regression.jl +++ b/test/patterns/regression.jl @@ -21,7 +21,7 @@ end """ Ridge(; lambda=0.1) -Instantiate a ridge regression algorithm, with regularization of `lambda`. +Instantiate a ridge regression learner, with regularization of `lambda`. """ Ridge(; lambda=0.1) = Ridge(lambda) # LearnAPI.constructor defined later @@ -33,12 +33,12 @@ struct RidgeFitObs{T,M<:AbstractMatrix{T}} end struct RidgeFitted{T,F} - algorithm::Ridge + learner::Ridge coefficients::Vector{T} feature_importances::F end -LearnAPI.algorithm(model::RidgeFitted) = model.algorithm +LearnAPI.learner(model::RidgeFitted) = model.learner Base.getindex(data::RidgeFitObs, I) = RidgeFitObs(data.A[:,I], data.names, data.y[I]) @@ -53,16 +53,16 @@ function LearnAPI.obs(::Ridge, data) end # for observations: -function LearnAPI.fit(algorithm::Ridge, observations::RidgeFitObs; verbosity=1) +function LearnAPI.fit(learner::Ridge, observations::RidgeFitObs; verbosity=1) # unpack hyperparameters and data: - lambda = algorithm.lambda + lambda = learner.lambda A = observations.A names = observations.names y = observations.y - # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix + # apply core learner: + coefficients = (A*A' + learner.lambda*I)\(A*y) # 1 x p matrix # determine crude feature importances: feature_importances = @@ -73,13 +73,13 @@ function LearnAPI.fit(algorithm::Ridge, observations::RidgeFitObs; verbosity=1) verbosity > 0 && @info "Features in order of importance: $(first.(feature_importances))" - return RidgeFitted(algorithm, coefficients, feature_importances) + return RidgeFitted(learner, coefficients, feature_importances) end # for unprocessed `data = (X, y)`: -LearnAPI.fit(algorithm::Ridge, data; kwargs...) = - fit(algorithm, obs(algorithm, data); kwargs...) +LearnAPI.fit(learner::Ridge, data; kwargs...) = + fit(learner, obs(learner, data); kwargs...) # extracting stuff from training data: LearnAPI.target(::Ridge, data) = last(data) @@ -101,7 +101,7 @@ LearnAPI.predict(model::RidgeFitted, ::Point, Xnew) = LearnAPI.feature_importances(model::RidgeFitted) = model.feature_importances LearnAPI.strip(model::RidgeFitted) = - RidgeFitted(model.algorithm, model.coefficients, nothing) + RidgeFitted(model.learner, model.coefficients, nothing) @trait( Ridge, @@ -110,7 +110,7 @@ LearnAPI.strip(model::RidgeFitted) = tags = ("regression",), functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -121,8 +121,8 @@ LearnAPI.strip(model::RidgeFitted) = ) # convenience method: -LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) +LearnAPI.fit(learner::Ridge, X, y; kwargs...) = + fit(learner, (X, y); kwargs...) # ## Tests @@ -138,17 +138,17 @@ y = 2a - b + 3c + 0.05*rand(n) data = (X, y) @testset "test an implementation of ridge regression" begin - algorithm = Ridge(lambda=0.5) - @test :(LearnAPI.obs) in LearnAPI.functions(algorithm) + learner = Ridge(lambda=0.5) + @test :(LearnAPI.obs) in LearnAPI.functions(learner) - @test LearnAPI.target(algorithm, data) == y - @test LearnAPI.features(algorithm, data) == X + @test LearnAPI.target(learner, data) == y + @test LearnAPI.features(learner, data) == X # verbose fitting: @test_logs( (:info, r"Feature"), fit( - algorithm, + learner, Tables.subset(X, train), y[train]; verbosity=1, @@ -158,7 +158,7 @@ data = (X, y) # quiet fitting: model = @test_logs( fit( - algorithm, + learner, Tables.subset(X, train), y[train]; verbosity=0, @@ -169,12 +169,12 @@ data = (X, y) @test ŷ isa Vector{Float64} @test predict(model, Tables.subset(X, test)) == ŷ - fitobs = LearnAPI.obs(algorithm, data) + fitobs = LearnAPI.obs(learner, data) predictobs = LearnAPI.obs(model, X) - model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) - @test LearnAPI.target(algorithm, fitobs) == y + model = fit(learner, MLUtils.getobs(fitobs, train); verbosity=0) + @test LearnAPI.target(learner, fitobs) == y @test predict(model, Point(), MLUtils.getobs(predictobs, test)) ≈ ŷ - @test predict(model, LearnAPI.features(algorithm, fitobs)) ≈ predict(model, X) + @test predict(model, LearnAPI.features(learner, fitobs)) ≈ predict(model, X) @test LearnAPI.feature_importances(model) isa Vector{<:Pair{Symbol}} @@ -184,7 +184,7 @@ data = (X, y) serialize(filename, small_model) recovered_model = deserialize(filename) - @test LearnAPI.algorithm(recovered_model) == algorithm + @test LearnAPI.learner(recovered_model) == learner @test predict( recovered_model, Point(), @@ -206,45 +206,45 @@ end """ BabyRidge(; lambda=0.1) -Instantiate a ridge regression algorithm, with regularization of `lambda`. +Instantiate a ridge regression learner, with regularization of `lambda`. """ BabyRidge(; lambda=0.1) = BabyRidge(lambda) # LearnAPI.constructor defined later struct BabyRidgeFitted{T,F} - algorithm::BabyRidge + learner::BabyRidge coefficients::Vector{T} feature_importances::F end -function LearnAPI.fit(algorithm::BabyRidge, data; verbosity=1) +function LearnAPI.fit(learner::BabyRidge, data; verbosity=1) X, y = data - lambda = algorithm.lambda + lambda = learner.lambda table = Tables.columntable(X) names = Tables.columnnames(table) |> collect A = Tables.matrix(table)' - # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector + # apply core learner: + coefficients = (A*A' + learner.lambda*I)\(A*y) # vector feature_importances = nothing - return BabyRidgeFitted(algorithm, coefficients, feature_importances) + return BabyRidgeFitted(learner, coefficients, feature_importances) end # extracting stuff from training data: LearnAPI.target(::BabyRidge, data) = last(data) -LearnAPI.algorithm(model::BabyRidgeFitted) = model.algorithm +LearnAPI.learner(model::BabyRidgeFitted) = model.learner LearnAPI.predict(model::BabyRidgeFitted, ::Point, Xnew) = Tables.matrix(Xnew)*model.coefficients LearnAPI.strip(model::BabyRidgeFitted) = - BabyRidgeFitted(model.algorithm, model.coefficients, nothing) + BabyRidgeFitted(model.learner, model.coefficients, nothing) @trait( BabyRidge, @@ -253,7 +253,7 @@ LearnAPI.strip(model::BabyRidgeFitted) = tags = ("regression",), functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -264,27 +264,27 @@ LearnAPI.strip(model::BabyRidgeFitted) = ) # convenience method: -LearnAPI.fit(algorithm::BabyRidge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) +LearnAPI.fit(learner::BabyRidge, X, y; kwargs...) = + fit(learner, (X, y); kwargs...) # ## Tests @testset "test a variation which does not overload LearnAPI.obs" begin - algorithm = BabyRidge(lambda=0.5) + learner = BabyRidge(lambda=0.5) - model = fit(algorithm, Tables.subset(X, train), y[train]; verbosity=0) + model = fit(learner, Tables.subset(X, train), y[train]; verbosity=0) ŷ = predict(model, Point(), Tables.subset(X, test)) @test ŷ isa Vector{Float64} - fitobs = obs(algorithm, data) + fitobs = obs(learner, data) predictobs = LearnAPI.obs(model, X) - model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) + model = fit(learner, MLUtils.getobs(fitobs, train); verbosity=0) @test predict(model, Point(), MLUtils.getobs(predictobs, test)) == ŷ == predict(model, MLUtils.getobs(predictobs, test)) - @test LearnAPI.target(algorithm, data) == y + @test LearnAPI.target(learner, data) == y @test LearnAPI.predict(model, X) ≈ - LearnAPI.predict(model, LearnAPI.features(algorithm, data)) + LearnAPI.predict(model, LearnAPI.features(learner, data)) end true diff --git a/test/patterns/static_algorithms.jl b/test/patterns/static_algorithms.jl index 5a4c277f..fef3cff1 100644 --- a/test/patterns/static_algorithms.jl +++ b/test/patterns/static_algorithms.jl @@ -16,23 +16,23 @@ end Selector(; names=Symbol[]) = Selector(names) # LearnAPI.constructor defined later # `fit` consumes no observational data, does no "learning", and just returns a thinly -# wrapped `algorithm` (to distinguish it from the algorithm in dispatch): -LearnAPI.fit(algorithm::Selector; verbosity=1) = Ref(algorithm) -LearnAPI.algorithm(model) = model[] +# wrapped `learner` (to distinguish it from the learner in dispatch): +LearnAPI.fit(learner::Selector; verbosity=1) = Ref(learner) +LearnAPI.learner(model) = model[] function LearnAPI.transform(model::Base.RefValue{Selector}, X) - algorithm = LearnAPI.algorithm(model) + learner = LearnAPI.learner(model) table = Tables.columntable(X) names = Tables.columnnames(table) - filtered_names = filter(in(algorithm.names), names) + filtered_names = filter(in(learner.names), names) filtered_columns = (Tables.getcolumn(table, name) for name in filtered_names) filtered_table = NamedTuple{filtered_names}((filtered_columns...,)) return Tables.materializer(X)(filtered_table) end # fit and transform in one go: -function LearnAPI.transform(algorithm::Selector, X) - model = fit(algorithm) +function LearnAPI.transform(learner::Selector, X) + model = fit(learner) transform(model, X) end @@ -44,7 +44,7 @@ end is_static = true, functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.transform), @@ -52,14 +52,14 @@ end ) @testset "test a static transformer" begin - algorithm = Selector(names=[:x, :w]) + learner = Selector(names=[:x, :w]) X = DataFrames.DataFrame(rand(3, 4), [:x, :y, :z, :w]) - model = fit(algorithm) # no data arguments! + model = fit(learner) # no data arguments! # if provided, data is ignored: - @test LearnAPI.algorithm(model) == algorithm + @test LearnAPI.learner(model) == learner W = transform(model, X) @test W == DataFrames.DataFrame(Tables.matrix(X)[:,[1,4]], [:x, :w]) - @test W == transform(algorithm, X) + @test W == transform(learner, X) end @@ -74,21 +74,21 @@ end FancySelector(; names=Symbol[]) = FancySelector(names) # LearnAPI.constructor defined later mutable struct FancySelectorFitted - algorithm::FancySelector + learner::FancySelector rejected::Vector{Symbol} - FancySelectorFitted(algorithm) = new(algorithm) + FancySelectorFitted(learner) = new(learner) end -LearnAPI.algorithm(model::FancySelectorFitted) = model.algorithm +LearnAPI.learner(model::FancySelectorFitted) = model.learner rejected(model::FancySelectorFitted) = model.rejected -# Here we are wrapping `algorithm` with a place-holder for the `rejected` feature names. -LearnAPI.fit(algorithm::FancySelector; verbosity=1) = FancySelectorFitted(algorithm) +# Here we are wrapping `learner` with a place-holder for the `rejected` feature names. +LearnAPI.fit(learner::FancySelector; verbosity=1) = FancySelectorFitted(learner) # output the filtered table and add `rejected` field to model (mutatated!) function LearnAPI.transform(model::FancySelectorFitted, X) table = Tables.columntable(X) names = Tables.columnnames(table) - keep = LearnAPI.algorithm(model).names + keep = LearnAPI.learner(model).names filtered_names = filter(in(keep), names) model.rejected = setdiff(names, filtered_names) filtered_columns = (Tables.getcolumn(table, name) for name in filtered_names) @@ -97,8 +97,8 @@ function LearnAPI.transform(model::FancySelectorFitted, X) end # fit and transform in one step: -function LearnAPI.transform(algorithm::FancySelector, X) - model = fit(algorithm) +function LearnAPI.transform(learner::FancySelector, X) + model = fit(learner) transform(model, X) end @@ -110,7 +110,7 @@ end tags = ("feature engineering",), functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.transform), @@ -119,14 +119,14 @@ end ) @testset "test a variation that reports byproducts" begin - algorithm = FancySelector(names=[:x, :w]) + learner = FancySelector(names=[:x, :w]) X = DataFrames.DataFrame(rand(3, 4), [:x, :y, :z, :w]) - model = fit(algorithm) # no data arguments! + model = fit(learner) # no data arguments! @test !isdefined(model, :reject) - @test LearnAPI.algorithm(model) == algorithm + @test LearnAPI.learner(model) == learner filtered = DataFrames.DataFrame(Tables.matrix(X)[:,[1,4]], [:x, :w]) @test transform(model, X) == filtered - @test transform(algorithm, X) == filtered + @test transform(learner, X) == filtered @test rejected(model) == [:y, :z] end diff --git a/test/traits.jl b/test/traits.jl index e6eaae45..b75ed658 100644 --- a/test/traits.jl +++ b/test/traits.jl @@ -1,18 +1,18 @@ using Test using LearnAPI -# A MINIMUM IMPLEMENTATION OF AN ALGORITHM +# A MINIMUM IMPLEMENTATION OF A LEARNER # does nothing useful -struct SmallAlgorithm end -LearnAPI.fit(algorithm::SmallAlgorithm, data; verbosity=1) = algorithm -LearnAPI.algorithm(model::SmallAlgorithm) = model +struct SmallLearner end +LearnAPI.fit(learner::SmallLearner, data; verbosity=1) = learner +LearnAPI.learner(model::SmallLearner) = model @trait( - SmallAlgorithm, - constructor = SmallAlgorithm, + SmallLearner, + constructor = SmallLearner, functions = ( :(LearnAPI.fit), - :(LearnAPI.algorithm), + :(LearnAPI.learner), :(LearnAPI.strip), :(LearnAPI.obs), :(LearnAPI.features), @@ -23,14 +23,14 @@ LearnAPI.algorithm(model::SmallAlgorithm) = model # ZERO ARGUMENT METHODS @test :(LearnAPI.fit) in LearnAPI.functions() -@test Point in LearnAPI.kinds_of_proxy() +@test Point() in LearnAPI.kinds_of_proxy() @test "regression" in LearnAPI.tags() # OVERLOADABLE TRAITS -small = SmallAlgorithm() -@test LearnAPI.constructor(small) == SmallAlgorithm -@test :(LearnAPI.algorithm) in LearnAPI.functions(small) +small = SmallLearner() +@test LearnAPI.constructor(small) == SmallLearner +@test :(LearnAPI.learner) in LearnAPI.functions(small) @test isempty(LearnAPI.kinds_of_proxy(small)) @test isempty(LearnAPI.tags(small)) @test !LearnAPI.is_pure_julia(small) @@ -39,7 +39,7 @@ small = SmallAlgorithm() @test LearnAPI.doc_url(small) == "unknown" @test LearnAPI.load_path(small) == "unknown" @test !LearnAPI.is_composite(small) -@test LearnAPI.human_name(small) == "small algorithm" +@test LearnAPI.human_name(small) == "small learner" @test isnothing(LearnAPI.iteration_parameter(small)) @test LearnAPI.data_interface(small) == LearnAPI.RandomAccess() @test !(6 isa LearnAPI.fit_observation_scitype(small)) @@ -48,9 +48,11 @@ small = SmallAlgorithm() # DERIVED TRAITS -@test LearnAPI.is_algorithm(small) +@trait SmallLearner kinds_of_proxy=(Point(),) +@test LearnAPI.is_learner(small) @test !LearnAPI.target(small) @test !LearnAPI.weights(small) +@test LearnAPI.preferred_kind_of_proxy(small) == Point() module FruitSalad import LearnAPI