make stack a double stack

ablaom · ablaom · commit 3eb1e8eecd01 · 2022-06-10T15:42:44.000+12:00
diff --git a/Project.toml b/Project.toml
@@ -5,6 +5,7 @@ version = "0.1.0"
 
 [deps]
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
+MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLJTuning = "03970b2e-30c4-11ea-3135-d1576263f10f"
 NearestNeighborModels = "636a865e-7cf4-491e-846c-de09b730eb36"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
diff --git a/examples/bigtest/Manifest.toml b/examples/bigtest/Manifest.toml
@@ -851,9 +851,7 @@ version = "0.1.4"
 
 [[deps.MLJTestIntegration]]
 deps = ["MLJ", "MLJTuning", "NearestNeighborModels", "Pkg", "Test"]
-git-tree-sha1 = "5c7a7ab6746c897e1904468cf0f9ef460ae1876d"
-repo-rev = "multi-threading"
-repo-url = "https://github.com/JuliaAI/MLJTestIntegration.jl"
+path = "/Users/anthony/MLJ/MLJTestIntegration"
 uuid = "697918b4-fdc1-4f9e-8ff9-929724cee270"
 version = "0.1.0"
 
diff --git a/src/MLJTestIntegration.jl b/src/MLJTestIntegration.jl
@@ -1,6 +1,6 @@
 module MLJTestIntegration
 
-const N_MODELS_FOR_REPEATABILITY_TEST = 50
+const N_MODELS_FOR_REPEATABILITY_TEST = 20
 
 using MLJ
 using Pkg
diff --git a/src/attemptors.jl b/src/attemptors.jl
@@ -131,11 +131,11 @@ function evaluation(measure, model, resources, data...; throw=false, verbosity=1
     message = L > 1 ? "[:accelerated_evaluation] " : "[evaluation] "
     message *=  "Evaluating model performance using $L different resources. "
     attempt(finalize(message, verbosity); throw) do
-        es = map(resources) do accel
+        es = map(resources) do resource
             evaluate(model, data...;
                      measure=measure,
                      resampling=Holdout(),
-                     acceleration=accel,
+                     acceleration=resource,
                      verbosity=0)
         end
         ms = map(e->sort(e.per_fold[1]), es)
@@ -156,7 +156,7 @@ function tuned_pipe_evaluation(
     attempt(finalize(message, verbosity); throw) do
         pipe = identity |> model
         tuned_pipe = TunedModel(
-            models=[pipe,],
+            models=fill(pipe, 3),
             measure=measure,
         )
         evaluate(
@@ -194,6 +194,31 @@ function iteration_prediction(measure, model, data...; throw=false, verbosity=1)
     end
 end
 
+function _stack(model, resource, isregressor)
+    if isregressor
+        models = (knn1=KNNRegressor(K=4),
+                  knn2=KNNRegressor(K=6),
+                  tmodel=model)
+        metalearner = KNNRegressor()
+    else
+        models = (knn1=KNNClassifier(K=4),
+                  knn2=KNNClassifier(K=6),
+                  tmodel=model)
+        metalearner = KNNClassifier()
+    end
+    Stack(;
+        metalearner,
+        resampling=CV(;nfolds=2),
+        acceleration=resource,
+        models...
+    )
+end
+
+# return a nested stack in which `model` appears at two levels, with
+# both layers accelerated using `resource`:
+_double_stack(model, resource, isregressor) =
+    _stack(_stack(model, resource, isregressor), resource, isregressor)
+
 # the `model` can only be single-target deterministic regressor or
 # probabilistic classifier.
 function stack_evaluation(
@@ -205,32 +230,17 @@ function stack_evaluation(
 )
     L = length(resources)
     message = L > 1 ? "[:accelerated_stack_evaluation] " : "[stack_evaluation] "
-    message *=  "Evaluating a stack containing model "*
-        "with $L different resources. "
+    message *=  "Evaluating a nested stack containing model "*
+        "using $L different resources. "
     target_scitype = MLJ.target_scitype(model)
-    if  AbstractVector{Continuous} <: target_scitype
-        models = (knn1=KNNRegressor(K=4),
-                  knn2=KNNRegressor(K=6),
-                  model=model)
-        metalearner = KNNRegressor()
-        measure = LPLoss(2)
-    else
-        models = (knn1=KNNClassifier(K=4),
-                  knn2=KNNClassifier(K=6),
-                  model=model)
-        metalearner = KNNClassifier()
-        measure = BrierScore()
-    end
-    attempt(finalize(message, verbosity); throw) do
-        es = map(resources) do accel
-            mystack = Stack(
-                ; metalearner,
-                resampling=CV(;nfolds=3),
-                acceleration=accel,
-                models...)
+    isregressor = AbstractVector{Continuous} <: target_scitype
+    measure = isregressor ? LPLoss(2) : BrierScore()
 
+    attempt(finalize(message, verbosity); throw) do
+        es = map(resources) do resource
+            stack = _stack(model, resource, isregressor)
             evaluate(
-                mystack,
+                stack,
                 data...;
                 measure=measure,
                 resampling=Holdout(),
@@ -239,7 +249,6 @@ function stack_evaluation(
         end |> collect
         ms = map(e->sort(e.per_fold[1]), es)
         m = first(ms)
-#        @show ms
         @assert all(≈(m), ms[2:end]) ERR_INCONSISTENT_RESULTS
         first(es)
     end
diff --git a/src/test.jl b/src/test.jl
@@ -147,15 +147,17 @@ These additional tests are applied to `Supervised` models:
 - `:iteration_prediction`: If the model is iterable, repeat the
   `:evaluation` test but first wrap as an `IteratedModel`.
 
-- `:stack_evaluation`: make the model one of three base models in a
-  `Stack`, and evaluate the `Stack`. 
+- `:stack_evaluation`: test a `Stack` within a `Stack`, with the model
+  being tested appearing at two levels, and evaluate the
+  `Stack`. (Other base models and adjudicators in the double stack are
+  instances of `KNNClassifier` or `KNNRegressor`.)
+  This test is only applied to single target supervised models that
+  are probabilistic classifiers or deterministic regressors.
 
 - `:accelerated_stack_evaluation`: If the model appears to make
   repeatable predictions on retraining, check consistency of
   evaluations for `Stack(acceleration=CPU1(), ...)` and
-  `Stack(acceleration=CPUThreads(), ...)`. This test is only applied
-  to single target supervised models that are probabilistic
-  classifiers or deterministic regressors.
+  `Stack(acceleration=CPUThreads(), ...)` (in the double stack above).
 
 """
 function test(model_proxies, data...; mod=Main, level=2, throw=false, verbosity=1,)
@@ -338,7 +340,7 @@ function test(model_proxies, data...; mod=Main, level=2, throw=false, verbosity=
                 verbosity > 1 && println(" Repeatable.")
             else
                 verbosity > 1 && println(" Not repeatable.")
-            end 
+            end
         end
 
         length(resources) > 1 && verbosity > 0 &&