JuliaAI · ablaom · Feb 18, 2025 · Jan 24, 2025 · Feb 11, 2025 · Feb 11, 2025
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LearnAPI"
 uuid = "92ad9a40-7767-427a-9ee6-6e577f1266cb"
 authors = ["Anthony D. Blaom <[email protected]>"]
-version = "0.2.0"
+version = "1.0.0"
 
 [compat]
 julia = "1.10"

diff --git a/README.md b/README.md
@@ -6,45 +6,58 @@ A base Julia interface for machine learning and statistics
 [![Build Status](https://github.com/JuliaAI/LearnAPI.jl/workflows/CI/badge.svg)](https://github.com/JuliaAI/LearnAPI.jl/actions)
 [![codecov](https://codecov.io/gh/JuliaAI/LearnAPI.jl/graph/badge.svg?token=9IWT9KYINZ)](https://codecov.io/gh/JuliaAI/LearnAPI.jl?branch=dev)
 [![Docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliaai.github.io/LearnAPI.jl/dev/)
-
-Comprehensive documentation is [here](https://juliaai.github.io/LearnAPI.jl/dev/).
+[![Docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliaai.github.io/LearnAPI.jl/stable/)
 
 New contributions welcome. See the [road map](ROADMAP.md).
 
-## Code snippet
+## Synopsis
 
-Configure a machine learning algorithm:
+LearnAPI.jl provides for variations and elaborations on the following basic pattern in machine
+learning and statistics:
 
 ```julia
-julia> ridge = Ridge(lambda=0.1)
+model = fit(learner, data)
+predict(model, newdata)
 ```
 
-Inspect available functionality:
+Here `learner` specifies the configuration the algorithm (the hyperparameters) while
+`model` stores learned parameters and any byproducts of algorithm execution.
 
-```
-julia> @functions ridge
-(fit, LearnAPI.learner, LearnAPI.strip, obs, LearnAPI.features, LearnAPI.target, predict, LearnAPI.coefficients)
-```
+LearnAPI.jl is mostly method stubs and lots of documentation. It does not provide
+meta-algorithms, such as cross-validation or hyperparameter optimization, but does aim to
+support such algorithms.
 
-Train:
+## Related packages
 
-```julia
-julia> model = fit(ridge, data)
-```
+- [MLCore.jl](https://github.com/JuliaML/MLCore.jl): The default sub-sampling API (`getobs`/`numbobs`) for LearnAPI.jl implementations, which supports tables and arrays.
 
-Predict:
+- [LearnTestAPI.jl](https://github.com/JuliaAI/LearnTestAPI.jl): Package to test implementations of LearnAPI.jl (but documented here)
 
-```julia
-julia> predict(model, newdata)[1]
-"virginica"
-```
+- [LearnDataFrontEnds.jl](https://github.com/JuliaAI/LearnDataFrontEnds.jl): For including flexible, user-friendly, data front ends for LearnAPI.jl implementations ([docs](https://juliaai.github.io/stable/))
 
-Predict a probability distribution ([proxy](https://juliaai.github.io/LearnAPI.jl/dev/kinds_of_target_proxy/#proxy_types) for the target):
+- [StatisticalMeasures.jl](https://github.com/JuliaAI/StatisticalMeasures.jl): Package providing metrics, compatible with LearnAPI.jl
+
+### Selected packages providing alternative API's
+
+The following alphabetical list of packages provide public base API's.  Some provide
+additional functionality. PR's to add missing items welcome.
+
+- [AutoMLPipeline.jl](https://github.com/IBM/AutoMLPipeline.jl)
+
+- [BetaML.jl](https://github.com/sylvaticus/BetaML.jl)
+
+- [FastAI.jl](https://github.com/FluxML/FastAI.jl) (focused on deep learning)
+
+- [LearnBase.jl](https://github.com/JuliaML/LearnBase.jl) (now archived but of historical interest)
+
+- [MLJModelInterface.jl](https://github.com/JuliaAI/MLJModelInterface.jl)
+
+- [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) (more than a base API, focused on deep learning)
+
+- [ScikitLearn.jl](https://github.com/cstjean/ScikitLearn.jl) (an API in addition to being a wrapper for [scikit-learn](https://scikit-learn.org/stable/))
+
+- [StatsAPI.jl](https://github.com/JuliaStats/StatsAPI.jl/blob/main/src/regressionmodel.jl) (specialized to needs of traditional statistical models)
 
-```julia
-julia> predict(model, Distribution(), newdata)[1]
-UnivariateFinite{Multiclass{3}}(setosa=>0.0, versicolor=>0.25, virginica=>0.75)
-```
 
 ## Credits
 

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -2,7 +2,8 @@
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterInterLinks = "d12716ef-a0f6-4df4-a9f1-a5a34e75c656"
 LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb"
-MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
+LearnTestAPI = "3111ed91-c4f2-40e7-bb19-7f6c618409b8"
+MLCore = "c2834f40-e789-41da-a90e-33b280584a8c"
 ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 

diff --git a/docs/make.jl b/docs/make.jl
@@ -2,11 +2,12 @@ using Documenter
 using LearnAPI
 using ScientificTypesBase
 using DocumenterInterLinks
+using LearnTestAPI
 
 const  REPO = Remotes.GitHub("JuliaAI", "LearnAPI.jl")
 
 makedocs(
-    modules=[LearnAPI,],
+    modules=[LearnAPI, LearnTestAPI],
     format=Documenter.HTML(
         prettyurls = true,#get(ENV, "CI", nothing) == "true",
         collapselevel = 1,
@@ -16,6 +17,7 @@ makedocs(
         "Anatomy of an Implementation" => "anatomy_of_an_implementation.md",
         "Reference" => [
             "Overview" => "reference.md",
+            "Public Names" => "list_of_public_names.md",
             "fit/update" => "fit_update.md",
             "predict/transform" => "predict_transform.md",
             "Kinds of Target Proxy" => "kinds_of_target_proxy.md",

diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md
@@ -105,7 +105,7 @@ nothing # hide
 ```
 
 Note that we also include `learner` in the struct, for it must be possible to recover
-`learner` from the output of `fit`; see [Accessor functions](@ref) below.
+`learner` from the output of `fit`; see [Accessor functions](@ref af) below.
 
 The implementation of `fit` looks like this:
 
@@ -159,7 +159,7 @@ first element of the tuple returned by [`LearnAPI.kinds_of_proxy(learner)`](@ref
 we overload appropriately below.
 
 
-### Accessor functions
+### [Accessor functions](@id af)
 
 An [accessor function](@ref accessor_functions) has the output of [`fit`](@ref) as it's
 sole argument.  Every new implementation must implement the accessor function
@@ -334,7 +334,7 @@ assumptions about data from those made above.
 
 - If the `data` object consumed by `fit`, `predict`, or `transform` is not not a suitable
   table¹, array³, tuple of tables and arrays, or some other object implementing the
-  [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs`/`numobs` interface,
+  [MLCore.jl](https://juliaml.github.io/MLCore.jl/dev/) `getobs`/`numobs` interface,
   then an implementation must: (i) overload [`obs`](@ref) to articulate how provided data
   can be transformed into a form that does support this interface, as illustrated below
   under [Providing a separate data front end](@ref) below; or (ii) overload the trait
@@ -419,7 +419,7 @@ The [`obs`](@ref) methods exist to:
     how it works.
 
 In the typical case, where [`LearnAPI.data_interface`](@ref) is not overloaded, the
-alternative data representations must implement the MLUtils.jl `getobs/numobs` interface
+alternative data representations must implement the MLCore.jl `getobs/numobs` interface
 for observation subsampling, which is generally all a user or meta-algorithm will need,
 before passing the data on to `fit`/`predict`, as you would the original data.
 
@@ -436,14 +436,14 @@ one enables the following alternative:
 observations = obs(learner, data) # preprocessed training data
 
 # optional subsampling:
-observations = MLUtils.getobs(observations, train_indices)
+observations = MLCore.getobs(observations, train_indices)
 
 model = fit(learner, observations)
 
 newobservations = obs(model, newdata)
 
 # optional subsampling:
-newobservations = MLUtils.getobs(observations, test_indices)
+newobservations = MLCore.getobs(observations, test_indices)
 
 predict(model, newobservations)
 ```
@@ -555,8 +555,8 @@ above. Here we must explicitly overload them, so that they also handle the outpu
 
 ```@example anatomy2
 LearnAPI.features(::Ridge, observations::RidgeFitObs) = observations.A
-LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y
 LearnAPI.features(learner::Ridge, data) = LearnAPI.features(learner, obs(learner, data))
+LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y
 LearnAPI.target(learner::Ridge, data) = LearnAPI.target(learner, obs(learner, data))
 ```
 
@@ -568,15 +568,15 @@ LearnAPI.target(learner::Ridge, data) = LearnAPI.target(learner, obs(learner, da
   are generally different.
 
 - We need the adjoint operator, `'`, because the last dimension in arrays is the
-  observation dimension, according to the MLUtils.jl convention. Remember, `Xnew` is a
+  observation dimension, according to the MLCore.jl convention. Remember, `Xnew` is a
   table here.
 
 Since LearnAPI.jl provides fallbacks for `obs` that simply return the unadulterated data
 argument, overloading `obs` is optional. This is provided data in publicized
 `fit`/`predict` signatures already consists only of objects implement the
 [`LearnAPI.RandomAccess`](@ref) interface (most tables¹, arrays³, and tuples thereof).
 
-To opt out of supporting the MLUtils.jl interface altogether, an implementation must
+To opt out of supporting the MLCore.jl interface altogether, an implementation must
 overload the trait, [`LearnAPI.data_interface(learner)`](@ref). See [Data
 interfaces](@ref data_interfaces) for details.
 
@@ -593,15 +593,15 @@ LearnAPI.fit(learner::Ridge, X, y; kwargs...)  = fit(learner, (X, y); kwargs...)
 ## [Demonstration of an advanced `obs` workflow](@id advanced_demo)
 
 We now can train and predict using internal data representations, resampled using the
-generic MLUtils.jl interface:
+generic MLCore.jl interface:
 
 ```@example anatomy2
-import MLUtils
+import MLCore
 learner = Ridge()
 observations_for_fit = obs(learner, (X, y))
-model = fit(learner, MLUtils.getobs(observations_for_fit, train))
+model = fit(learner, MLCore.getobs(observations_for_fit, train))
 observations_for_predict = obs(model, X)
-ẑ = predict(model, MLUtils.getobs(observations_for_predict, test))
+ẑ = predict(model, MLCore.getobs(observations_for_predict, test))
 ```
 
 ```julia
@@ -616,7 +616,7 @@ obs_workflows).
 ¹ In LearnAPI.jl a *table* is any object `X` implementing the
 [Tables.jl](https://tables.juliadata.org/dev/) interface, additionally satisfying
 `Tables.istable(X) == true` and implementing `DataAPI.nrow` (and whence
-`MLUtils.numobs`). Tables that are also (unnamed) tuples are disallowed.
+`MLCore.numobs`). Tables that are also (unnamed) tuples are disallowed.
 
 ² An implementation can provide further accessor functions, if necessary, but
 like the native ones, they must be included in the [`LearnAPI.functions`](@ref)

diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -4,7 +4,8 @@ Below is the complete source code for the ridge implementations described in the
 [Anatomy of an Implementation](@ref).
 
 - [Basic implementation](@ref)
-- [Implementation with data front end](@ref)
+- [Implementation with a data front end](@ref)
+- [Implementation with a canned data front end](@ref) 
 
 
 ## Basic implementation
@@ -85,7 +86,7 @@ LearnAPI.strip(model::RidgeFitted) =
 LearnAPI.fit(learner::Ridge, X, y; kwargs...) = fit(learner, (X, y); kwargs...)
 ```
 
-# Implementation with data front end
+# Implementation with a data front end
 
 ```julia
 using LearnAPI
@@ -190,3 +191,91 @@ LearnAPI.strip(model::RidgeFitted) =
 )
 
 ```
+
+# Implementation with a canned data front end
+
+The following implements the `Saffron` data front end from
+[LearnDataFrontEnds.jl](https://juliaai.github.io/LearnDataFrontEnds.jl/stable/), which
+allows for a greater variety of forms of input to `fit` and `predict`.  Refer to that
+package's [documentation](https://juliaai.github.io/LearnDataFrontEnds.jl/stable/) for details.
+
+```julia
+using LearnAPI
+import LearnDataFrontEnds as FrontEnds
+using LinearAlgebra, Tables
+
+struct Ridge{T<:Real}
+   lambda::T
+end
+
+Ridge(; lambda=0.1) = Ridge(lambda)
+
+# struct for output of `fit`:
+struct RidgeFitted{T,F}
+    learner::Ridge
+    coefficients::Vector{T}
+    named_coefficients::F
+end
+
+frontend = FrontEnds.Saffron()
+
+# these will return objects of type `FrontEnds.Obs`:
+LearnAPI.obs(learner::Ridge, data) = FrontEnds.fitobs(learner, data, frontend)
+LearnAPI.obs(model::RidgeFitted, data) = obs(model, data, frontend)
+
+function LearnAPI.fit(learner::Ridge, observations::FrontEnds.Obs; verbosity=1)
+
+    lambda = learner.lambda
+
+    A = observations.features
+    names = observations.names
+    y = observations.target
+
+    # apply core learner:
+    coefficients = (A*A' + learner.lambda*I)\(A*y) # 1 x p matrix
+
+    # determine named coefficients:
+    named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)]
+
+    # make some noise, if allowed:
+    verbosity > 0 && @info "Coefficients: $named_coefficients"
+
+    return RidgeFitted(learner, coefficients, named_coefficients)
+
+end
+LearnAPI.fit(learner::Ridge, data; kwargs...) =
+    fit(learner, obs(learner, data); kwargs...)
+
+LearnAPI.predict(model::RidgeFitted, ::Point, observations::FrontEnds.Obs) =
+    (observations.features)'*model.coefficients
+LearnAPI.predict(model::RidgeFitted, ::Point, Xnew) =
+    predict(model, Point(), obs(model, Xnew))
+
+# training data deconstructors:
+LearnAPI.features(learner::Ridge, data) = LearnAPI.features(learner, data, frontend)
+LearnAPI.target(learner::Ridge, data) = LearnAPI.target(learner, data, frontend)
+
+# accessor functions:
+LearnAPI.learner(model::RidgeFitted) = model.learner
+LearnAPI.coefficients(model::RidgeFitted) = model.named_coefficients
+LearnAPI.strip(model::RidgeFitted) =
+    RidgeFitted(model.learner, model.coefficients, nothing)
+
+@trait(
+    Ridge,
+    constructor = Ridge,
+    kinds_of_proxy=(Point(),),
+    tags = ("regression",),
+    functions = (
+        :(LearnAPI.fit),
+        :(LearnAPI.learner),
+        :(LearnAPI.clone),
+        :(LearnAPI.strip),
+        :(LearnAPI.obs),
+        :(LearnAPI.features),
+        :(LearnAPI.target),
+        :(LearnAPI.predict),
+        :(LearnAPI.coefficients),
+   )
+)
+```
diff --git a/docs/src/fit_update.md b/docs/src/fit_update.md
@@ -15,9 +15,9 @@ clustering algorithms); there is no training data and heavy lifting is carried o
 ### Updating
 
 ```
-update(model, data; verbosity=..., param1=new_value1, param2=new_value2, ...) -> updated_model
-update_observations(model, new_data; verbosity=..., param1=new_value1, ...) -> updated_model
-update_features(model, new_data; verbosity=..., param1=new_value1, ...) -> updated_model
+update(model, data; verbosity=..., :param1=new_value1, :param2=new_value2, ...) -> updated_model
+update_observations(model, new_data; verbosity=..., :param1=new_value1, ...) -> updated_model
+update_features(model, new_data; verbosity=..., :param1=new_value1, ...) -> updated_model
 ```
 
 ## Typical workflows