.response} is created,
@@ -70,6 +72,11 @@ Training time, in seconds.
Errors logged during prediction.
\item \code{predict_time} :: \code{NULL} | \code{numeric(1)}
Prediction time, in seconds.
+\item \code{predict_method} :: \code{character(1)}\cr
+\code{"full"} when prediction uses a learner fitted on all training data, \code{"cv_ensemble"} when predictions are averaged over
+models trained on resampling folds.
+\item \code{cv_model_states} :: \code{NULL} | \code{list}\cr
+Present for \code{predict_method = "cv_ensemble"}. Contains the states of the learners trained on each resampling fold.
}
This state is given the class \code{"pipeop_learner_cv_state"}.
@@ -87,6 +94,20 @@ predictions with the model trained on all training data.
Number of cross validation folds. Initialized to 3. Only used for \code{resampling.method = "cv"}.
\item \code{keep_response} :: \code{logical(1)}\cr
Only effective during \code{"prob"} prediction: Whether to keep response values, if available. Initialized to \code{FALSE}.
+\item \code{resampling.predict_method} :: \code{character(1)}\cr
+Controls how predictions are produced after training. \code{"full"} (default) fits the wrapped learner on the entire training data.
+\code{"cv_ensemble"} reuses the models fitted during resampling and averages their predictions. This option currently supports
+classification and regression learners together with \code{resampling.method = "cv"}.
+\item \code{resampling.se_aggr} :: \code{character(1)}\cr
+Standard error aggregation used when \code{"cv_ensemble"} predictions are produced for regression learners with \code{predict_type}
+containing \code{"se"}. Shares the definitions with \code{\link{PipeOpRegrAvg}}, i.e. \code{"predictive"}, \code{"mean"}, \code{"within"}, \code{"between"}, \code{"none"}.
+Initialized to \code{"predictive"} (within-fold variance plus between-fold disagreement) when constructed with a \code{\link[mlr3:Learner]{Learner}} that has \code{predict_type = "se"};
+otherwise to \code{"none"}.\cr
+Only present for learners that support \code{"se"} predictions.
+\item \code{resampling.se_aggr_rho} :: \code{numeric(1)}\cr
+Equicorrelation parameter for \code{resampling.se_aggr = "mean"}, interpreted as in \code{\link{PipeOpRegrAvg}}. Ignored otherwise.
+Defaults to \code{0} when \code{resampling.se_aggr = "mean"}.\cr
+Only present for learners that support \code{"se"} predictions.
}
}
@@ -112,7 +133,7 @@ Methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
task = tsk("iris")
@@ -131,6 +152,7 @@ graph = gunion(list(
graph$train(task)
graph$pipeops$classif.rpart$learner$predict_type = "prob"
+graph$pipeops$classif.rpart$param_set$values$resampling.predict_method = "cv_ensemble"
graph$train(task)
\dontshow{\}) # examplesIf}
diff --git a/man/mlr_pipeops_learner_pi_cvplus.Rd b/man/mlr_pipeops_learner_pi_cvplus.Rd
index 188718a80..71c29e9b7 100644
--- a/man/mlr_pipeops_learner_pi_cvplus.Rd
+++ b/man/mlr_pipeops_learner_pi_cvplus.Rd
@@ -100,7 +100,7 @@ Methods inherited from \code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
task = tsk("mtcars")
diff --git a/man/mlr_pipeops_nearmiss.Rd b/man/mlr_pipeops_nearmiss.Rd
index 8d4898a66..6340acf23 100644
--- a/man/mlr_pipeops_nearmiss.Rd
+++ b/man/mlr_pipeops_nearmiss.Rd
@@ -69,7 +69,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("themis")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("themis")) withAutoprint(\{ # examplesIf}
library("mlr3")
# Create example task
diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 7f0cfc265..a6d3bd8f1 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}.
\section{Internals}{
-Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and
+Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and
\code{\link[MASS:ginv]{ginv()}}.
}
@@ -111,7 +111,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (mlr3misc::require_namespaces(c("NMF", "MASS"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("NMF", "MASS"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
\dontshow{
# NMF attaches these packages to search path on load, #929
lapply(c("package:Biobase", "package:BiocGenerics", "package:generics"), detach, character.only = TRUE)
diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd
index 062e086f8..3aa506282 100644
--- a/man/mlr_pipeops_ovrsplit.Rd
+++ b/man/mlr_pipeops_ovrsplit.Rd
@@ -81,7 +81,7 @@ Only methods inherited from \code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library(mlr3)
task = tsk("iris")
po = po("ovrsplit")
diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd
index 3ce033902..ba513e4ee 100644
--- a/man/mlr_pipeops_ovrunite.Rd
+++ b/man/mlr_pipeops_ovrunite.Rd
@@ -74,7 +74,7 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library(mlr3)
task = tsk("iris")
gr = po("ovrsplit") \%>>\% lrn("classif.rpart") \%>>\% po("ovrunite")
diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd
index d1b92cf65..b472ba83d 100644
--- a/man/mlr_pipeops_proxy.Rd
+++ b/man/mlr_pipeops_proxy.Rd
@@ -74,7 +74,7 @@ Only methods inherited from \code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
set.seed(1234)
diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd
index e3331d268..570d9e20f 100644
--- a/man/mlr_pipeops_randomresponse.Rd
+++ b/man/mlr_pipeops_randomresponse.Rd
@@ -79,7 +79,7 @@ Only methods inherited from \code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library(mlr3)
library(mlr3learners)
diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd
index 76fac8eb0..6e8542ddf 100644
--- a/man/mlr_pipeops_regravg.Rd
+++ b/man/mlr_pipeops_regravg.Rd
@@ -12,12 +12,80 @@ Perform (weighted) prediction averaging from regression \code{\link[mlr3:Predict
\code{\link{PipeOpRegrAvg}} to multiple \code{\link{PipeOpLearner}} outputs.
The resulting \code{"response"} prediction is a weighted average of the incoming \code{"response"} predictions.
-\code{"se"} prediction is currently not aggregated but discarded if present.
+Aggregation of \code{"se"} predictions is controlled by the \code{se_aggr} parameter (see below). When \code{"se"} is not requested
+or \code{se_aggr = "none"}, \code{"se"} is dropped.
+}
+\section{"se" Aggregation}{
+
+
+Let there be \code{K} incoming predictions with weights \code{w} (sum to 1). For a given row \code{j}, denote
+per-model means \code{mu_i[j]} and, if available, per-model standard errors \code{se_i[j]}.
+Define
+
+\if{html}{\out{}}\preformatted{mu_bar[j] = sum_i w[i] * mu_i[j]
+var_between[j] = sum_i w[i] * (mu_i[j] - mu_bar[j])^2 # weighted var of means
+var_within[j] = sum_i w[i] * se_i[j]^2 # weighted mean of SE^2s
+}\if{html}{\out{
}}
+
+The following aggregation methods are available:
+\itemize{
+\item \strong{\code{se_aggr = "predictive"}} -- \emph{Within + Between (mixture/predictive SD)}
+
+\if{html}{\out{}}\preformatted{se[j] = sqrt(var_within[j] + var_between[j])
+}\if{html}{\out{
}}
+
+\strong{Interpretation.} Treats each incoming \code{se_i} as that model's predictive SD at the point (or, if the learner
+reports SE of the conditional mean--as many \code{mlr3} regression learners do--then as that mean-SE). The returned \code{se}
+is the SD of the \emph{mixture ensemble} under weighted averaging: it increases when base models disagree (epistemic spread)
+and when individual models are uncertain (aleatoric spread).
+\strong{Notes.} If \code{se_i} represents \emph{mean} SE (common in \code{predict.lm(se.fit=TRUE)}-style learners), the result
+aggregates those mean-SEs and still adds model disagreement correctly, but it will \emph{underestimate} a true predictive SD
+that would additionally include irreducible noise. Requires \code{"se"} to be present from \strong{all} inputs.
+\item \strong{\code{se_aggr = "mean"}} -- \emph{SE of the weighted average of means under equicorrelation}
+With a correlation parameter \code{se_aggr_rho = rho}, assume
+\code{Cov(mu_i_hat, mu_j_hat) = rho * se_i * se_j} for all \code{i != j}. Then
+
+\if{html}{\out{}}\preformatted{# components:
+a[j] = sum_i (w[i]^2 * se_i[j]^2)
+b[j] = (sum_i w[i] * se_i[j])^2
+var_mean[j] = (1 - rho) * a[j] + rho * b[j]
+se[j] = sqrt(var_mean[j])
+}\if{html}{\out{
}}
+
+\strong{Interpretation.} Returns the \emph{standard error of the averaged estimator} \verb{sum_i w[i] * mu_i}, not a predictive SD.
+Use when you specifically care about uncertainty of the averaged mean itself.
+\strong{Notes.} \code{rho} is clamped to the PSD range \verb{[-1/(K-1), 1]} for \code{K > 1}. Typical settings:
+\code{rho = 0} (assume independence; often optimistic for CV/bagging) and \code{rho = 1} (perfect correlation; conservative and
+equal to the weighted arithmetic mean of SEs). Requires \code{"se"} from \strong{all} inputs.
+\item \strong{\code{se_aggr = "within"}} -- \emph{Within-model component only}
+
+\if{html}{\out{}}\preformatted{se[j] = sqrt(var_within[j])
+}\if{html}{\out{
}}
+
+\strong{Interpretation.} Aggregates only the average per-model uncertainty and \strong{ignores} disagreement between models.
+Useful as a diagnostic of the aleatoric component; not a full ensemble uncertainty.
+\strong{Notes.} Typically \emph{underestimates} the uncertainty of the ensemble prediction when models disagree.
+Requires \code{"se"} from \strong{all} inputs.
+\item \strong{\code{se_aggr = "between"}} -- \emph{Between-model component only (works without \code{"se"})}
+
+\if{html}{\out{}}\preformatted{se[j] = sqrt(var_between[j])
+}\if{html}{\out{
}}
+
+\strong{Interpretation.} Captures only the spread of the base means (epistemic/model disagreement).
+\strong{Notes.} This is the only method that does not use incoming \code{"se"}. It is a \emph{lower bound} on a full predictive SD,
+because it omits within-model noise.
+\item \strong{\code{se_aggr = "none"}} -- \emph{Do not return \code{"se"}}
+\code{"se"} is dropped from the output prediction.
+}
+
+\strong{Relationships and edge cases.} For any row, \code{se("predictive") >= max(se("within"), se("between"))}.
+With a single input (\code{K = 1}), \code{"predictive"} and \code{"within"} return the input \code{"se"}, \code{"between"} returns \code{0}.
+Methods \code{"predictive"}, \code{"mean"}, and \code{"within"} require all inputs to provide \code{"se"}; otherwise aggregation errors.
Weights can be set as a parameter; if none are provided, defaults to
equal weights for each prediction.
-Defaults to equal weights for each model.
}
+
\section{Construction}{
@@ -51,7 +119,15 @@ The \verb{$state} is left empty (\code{list()}).
\section{Parameters}{
-The parameters are the parameters inherited from the \code{\link{PipeOpEnsemble}}.
+The parameters are the parameters inherited from the \code{\link{PipeOpEnsemble}}, as well as:
+\itemize{
+\item \code{se_aggr} :: \code{character(1)}\cr
+Controls how incoming \code{"se"} values are aggregated into an ensemble \code{"se"}. One of
+\code{"predictive"}, \code{"mean"}, \code{"within"}, \code{"between"}, \code{"none"}. See the description above for definitions and interpretation.
+\item \code{se_aggr_rho} :: \code{numeric(1)}\cr
+Equicorrelation parameter used only for \code{se_aggr = "mean"}. Interpreted as the common correlation between
+per-model mean estimators. Recommended range \verb{[0, 1]}; values are clamped to \verb{[-1/(K-1), 1]} for validity.
+}
}
\section{Internals}{
@@ -70,18 +146,18 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
-# Simple Bagging
+# Simple Bagging for Regression
gr = ppl("greplicate",
po("subsample") \%>>\%
- po("learner", lrn("classif.rpart")),
+ po("learner", lrn("regr.rpart")),
n = 5
) \%>>\%
- po("classifavg")
+ po("regravg")
-resample(tsk("iris"), GraphLearner$new(gr), rsmp("holdout"))
+resample(tsk("mtcars"), GraphLearner$new(gr), rsmp("holdout"))
\dontshow{\}) # examplesIf}
}
\seealso{
diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd
index 0bfd310db..412e0bed5 100644
--- a/man/mlr_pipeops_smote.Rd
+++ b/man/mlr_pipeops_smote.Rd
@@ -69,7 +69,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("smotefamily")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("smotefamily")) withAutoprint(\{ # examplesIf}
library("mlr3")
# Create example task
diff --git a/man/mlr_pipeops_smotenc.Rd b/man/mlr_pipeops_smotenc.Rd
index 5a231bb2a..b9dfa69a9 100644
--- a/man/mlr_pipeops_smotenc.Rd
+++ b/man/mlr_pipeops_smotenc.Rd
@@ -78,7 +78,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("themis")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("themis")) withAutoprint(\{ # examplesIf}
library("mlr3")
# Create example task
diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd
index b348d1ce3..eb1d10b11 100644
--- a/man/mlr_pipeops_targetmutate.Rd
+++ b/man/mlr_pipeops_targetmutate.Rd
@@ -78,7 +78,7 @@ Only methods inherited from \code{\link{PipeOpTargetTrafo}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library(mlr3)
task = tsk("boston_housing")
po = PipeOpTargetMutate$new("logtrafo", param_vals = list(
diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd
index 8400551c5..9f4b95778 100644
--- a/man/mlr_pipeops_targettrafoscalerange.Rd
+++ b/man/mlr_pipeops_targettrafoscalerange.Rd
@@ -66,7 +66,7 @@ Only methods inherited from \code{\link{PipeOpTargetTrafo}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library(mlr3)
task = tsk("boston_housing")
po = PipeOpTargetTrafoScaleRange$new()
diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd
index d40503694..726573712 100644
--- a/man/mlr_pipeops_textvectorizer.Rd
+++ b/man/mlr_pipeops_textvectorizer.Rd
@@ -167,7 +167,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (mlr3misc::require_namespaces(c("stopwords", "quanteda"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("stopwords", "quanteda"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
library("mlr3")
library("data.table")
# create some text data
diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd
index d8aa2fa5c..65ef1fd9e 100644
--- a/man/mlr_pipeops_threshold.Rd
+++ b/man/mlr_pipeops_threshold.Rd
@@ -67,7 +67,7 @@ Only methods inherited from \code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
t = tsk("german_credit")
gr = po(lrn("classif.rpart", predict_type = "prob")) \%>>\%
diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd
index 7a3bee4bd..7fa699d8c 100644
--- a/man/mlr_pipeops_tomek.Rd
+++ b/man/mlr_pipeops_tomek.Rd
@@ -61,7 +61,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("themis")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("themis")) withAutoprint(\{ # examplesIf}
library("mlr3")
# Create example task
diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd
index f2707ef05..3ffd5e43f 100644
--- a/man/mlr_pipeops_tunethreshold.Rd
+++ b/man/mlr_pipeops_tunethreshold.Rd
@@ -89,7 +89,7 @@ Only methods inherited from \code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (mlr3misc::require_namespaces(c("bbotk", "rpart", "GenSA"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("bbotk", "rpart", "GenSA"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
library("mlr3")
task = tsk("iris")
diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd
index 263b41ff3..434e3ec96 100644
--- a/man/mlr_pipeops_updatetarget.Rd
+++ b/man/mlr_pipeops_updatetarget.Rd
@@ -75,7 +75,7 @@ Only methods inherited from \code{\link{PipeOp}}.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
\dontrun{
# Create a binary class task from iris
library(mlr3)
diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd
index 81514f09d..110fe7084 100644
--- a/man/mlr_pipeops_vtreat.Rd
+++ b/man/mlr_pipeops_vtreat.Rd
@@ -128,7 +128,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("vtreat")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("vtreat")) withAutoprint(\{ # examplesIf}
library("mlr3")
set.seed(2020)
diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd
index f1823e343..1293846c7 100644
--- a/man/mlr_pipeops_yeojohnson.Rd
+++ b/man/mlr_pipeops_yeojohnson.Rd
@@ -73,7 +73,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
}
\examples{
-\dontshow{if (requireNamespace("bestNormalize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("bestNormalize")) withAutoprint(\{ # examplesIf}
library("mlr3")
task = tsk("iris")
diff --git a/man/po.Rd b/man/po.Rd
index cdb941a25..8b1b13562 100644
--- a/man/po.Rd
+++ b/man/po.Rd
@@ -48,7 +48,7 @@ it to a \code{\link{PipeOp}}. \code{pos()} (with plural-s) takes either a \code{
list of objects, and creates a \code{list} of \code{\link{PipeOp}}s.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
po("learner", lrn("classif.rpart"), cp = 0.3)
diff --git a/man/ppl.Rd b/man/ppl.Rd
index 190eef7d1..77ec8d1c4 100644
--- a/man/ppl.Rd
+++ b/man/ppl.Rd
@@ -32,7 +32,7 @@ Creates a \code{\link{Graph}} from \code{\link{mlr_graphs}} from given ID
vector of any list and returns a \code{list} of possibly muliple \code{\link{Graph}}s.
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
gr = ppl("bagging", graph = po(lrn("regr.rpart")),
diff --git a/man/preproc.Rd b/man/preproc.Rd
index 2b6340637..7fa3b3e43 100644
--- a/man/preproc.Rd
+++ b/man/preproc.Rd
@@ -56,7 +56,7 @@ of \code{\link[mlr3:TaskSupervised]{TaskSupervised}} will not work with these in
}
\examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
library("mlr3")
task = tsk("iris")
diff --git a/tests/testthat/test_pipeop_classifavg.R b/tests/testthat/test_pipeop_classifavg.R
new file mode 100644
index 000000000..feb3a07af
--- /dev/null
+++ b/tests/testthat/test_pipeop_classifavg.R
@@ -0,0 +1,127 @@
+context("PipeOpClassifAvg")
+
+predict_classifavg = function(predictions, prob_aggr = "mean", weights = 1, prob_aggr_eps = 1e-12) {
+ po = po("classifavg")
+ po$param_set$values$weights = weights
+ po$param_set$values$prob_aggr = prob_aggr
+ if (identical(prob_aggr, "log")) {
+ po$param_set$values$prob_aggr_eps = prob_aggr_eps
+ }
+ train_nulls = replicate(length(predictions), NULL, simplify = FALSE)
+ po$train(train_nulls)
+ po$predict(predictions)[[1]]
+}
+
+test_that("PipeOpClassifAvg probability aggregation methods return expected probabilities", {
+ row_ids = 1:3
+ lvls = c("c0", "c1", "c2")
+ truth = factor(c("c0", "c1", "c2"), levels = lvls)
+
+ prob_mats = list(
+ matrix(c(
+ 0.7, 0.2, 0.1,
+ 0.3, 0.4, 0.3,
+ 0.2, 0.3, 0.5
+ ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls)),
+ matrix(c(
+ 0.6, 0.1, 0.3,
+ 0.4, 0.3, 0.3,
+ 0.25, 0.25, 0.5
+ ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls)),
+ matrix(c(
+ 0.5, 0.3, 0.2,
+ 0.2, 0.5, 0.3,
+ 0.3, 0.2, 0.5
+ ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls))
+ )
+
+ predictions = lapply(prob_mats, function(prob) {
+ PredictionClassif$new(row_ids = row_ids, truth = truth, prob = prob)
+ })
+ weights = c(0.2, 0.3, 0.5)
+
+ pred_mean = predict_classifavg(predictions, prob_aggr = "mean", weights = weights)
+ expected_mean = Reduce(`+`, Map(function(prob, w) prob * w, prob_mats, weights))
+ expect_equal(pred_mean$prob, expected_mean, tolerance = 1e-8)
+ expected_response_mean = factor(colnames(expected_mean)[max.col(expected_mean, ties.method = "first")], levels = lvls)
+ expect_equal(pred_mean$response, expected_response_mean)
+
+ pred_log = predict_classifavg(predictions, prob_aggr = "log", weights = weights)
+ expected_log = mlr3pipelines:::weighted_matrix_logpool(prob_mats, weights, epsilon = 1e-12)
+ expect_equal(pred_log$prob, expected_log, tolerance = 1e-8)
+ expected_response_log = factor(colnames(expected_log)[max.col(expected_log, ties.method = "first")], levels = lvls)
+ expect_equal(pred_log$response, expected_response_log)
+})
+
+test_that("PipeOpClassifAvg single prediction returns input probabilities for mean and log", {
+ row_ids = 1:4
+ lvls = c("yes", "no")
+ truth = factor(c("yes", "no", "yes", "no"), levels = lvls)
+ single_prob = matrix(c(
+ 0.8, 0.2,
+ 0.1, 0.9,
+ 0.6, 0.4,
+ 0.3, 0.7
+ ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls))
+
+ prediction = list(PredictionClassif$new(row_ids = row_ids, truth = truth, prob = single_prob))
+
+ result_mean = predict_classifavg(prediction, prob_aggr = "mean", weights = 1)
+ expect_equal(result_mean$prob, single_prob, tolerance = 1e-10)
+
+ result_log = predict_classifavg(prediction, prob_aggr = "log", weights = 1)
+ expect_equal(result_log$prob, single_prob, tolerance = 1e-10)
+})
+
+test_that("PipeOpClassifAvg aggregates factor responses when probabilities are missing", {
+ row_ids = 1:5
+ lvls = c("a", "b")
+ truth = factor(rep("a", length(row_ids)), levels = lvls)
+ responses = list(
+ factor(c("a", "a", "b", "a", "b"), levels = lvls),
+ factor(c("b", "a", "b", "b", "b"), levels = lvls),
+ factor(c("a", "b", "a", "a", "b"), levels = lvls)
+ )
+ predictions = lapply(responses, function(resp) {
+ PredictionClassif$new(row_ids = row_ids, truth = truth, response = resp)
+ })
+ weights = c(0.5, 0.3, 0.2)
+
+ result = predict_classifavg(predictions, prob_aggr = "log", weights = weights)
+ expected_freq = mlr3pipelines:::weighted_factor_mean(responses, weights, lvls)
+ expect_equal(result$prob, expected_freq)
+ expected_response = factor(lvls[max.col(expected_freq, ties.method = "first")], levels = lvls)
+ expect_equal(result$response, expected_response)
+})
+
+test_that("PipeOpClassifAvg log aggregation handles zeros with epsilon", {
+ row_ids = 1
+ lvls = c("a", "b")
+ truth = factor("a", levels = lvls)
+ prob_list = list(
+ matrix(c(0, 1), ncol = length(lvls), dimnames = list(NULL, lvls)),
+ matrix(c(0.5, 0.5), ncol = length(lvls), dimnames = list(NULL, lvls)),
+ matrix(c(0.5, 0.5), ncol = length(lvls), dimnames = list(NULL, lvls))
+ )
+ predictions = lapply(prob_list, function(prob) {
+ PredictionClassif$new(row_ids = row_ids, truth = truth, prob = prob)
+ })
+
+ po = po("classifavg")
+ po$param_set$values$weights = rep(1 / length(predictions), length(predictions))
+ po$param_set$values$prob_aggr = "log"
+ po$param_set$values$prob_aggr_eps = 1e-12
+
+ po$train(replicate(length(predictions), NULL, simplify = FALSE))
+ result_eps = po$predict(predictions)[[1]]
+ expect_true(all(is.finite(result_eps$prob)))
+ expected_eps = mlr3pipelines:::weighted_matrix_logpool(prob_list, po$param_set$values$weights, epsilon = 1e-12)
+ expect_equal(result_eps$prob, expected_eps, tolerance = 1e-10)
+
+ po$param_set$values$prob_aggr_eps = 0
+ po$train(replicate(length(predictions), NULL, simplify = FALSE))
+ result_zero = po$predict(predictions)[[1]]
+ expected_zero = mlr3pipelines:::weighted_matrix_logpool(prob_list, po$param_set$values$weights, epsilon = 0)
+ expect_equal(result_zero$prob, expected_zero)
+ expect_equal(as.numeric(result_zero$prob[1, ]), c(0, 1))
+})
diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R
index 9d6069f65..a91c5731e 100644
--- a/tests/testthat/test_pipeop_learnercv.R
+++ b/tests/testthat/test_pipeop_learnercv.R
@@ -42,12 +42,410 @@ test_that("PipeOpLearnerCV - param values", {
skip_if_not_installed("rpart")
lrn = mlr_learners$get("classif.rpart")
polrn = PipeOpLearnerCV$new(lrn)
- expect_subset(c("minsplit", "resampling.method", "resampling.folds"), polrn$param_set$ids())
- expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, xval = 0))
+ expect_true(all(c(
+ "minsplit",
+ "resampling.method",
+ "resampling.folds",
+ "resampling.predict_method",
+ "resampling.prob_aggr",
+ "resampling.prob_aggr_eps"
+ ) %in% polrn$param_set$ids()))
+ expect_false(any(c("resampling.se_aggr", "resampling.se_aggr_rho") %in% polrn$param_set$ids()))
+ expect_equal(polrn$param_set$values$resampling.method, "cv")
+ expect_equal(polrn$param_set$values$resampling.folds, 3)
+ expect_false(polrn$param_set$values$resampling.keep_response)
+ expect_equal(polrn$param_set$values$resampling.predict_method, "full")
+ expect_equal(polrn$param_set$values$resampling.prob_aggr, "mean")
+ expect_null(polrn$param_set$values$resampling.prob_aggr_eps)
+ expect_equal(polrn$param_set$values$xval, 0)
polrn$param_set$values$minsplit = 2
- expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+ expect_equal(polrn$param_set$values$minsplit, 2)
+ expect_equal(polrn$param_set$values$resampling.prob_aggr, "mean")
+ expect_null(polrn$param_set$values$resampling.prob_aggr_eps)
polrn$param_set$values$resampling.folds = 4
- expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 4, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+ expect_equal(polrn$param_set$values$resampling.folds, 4)
+ expect_equal(polrn$param_set$values$minsplit, 2)
+})
+
+test_that("PipeOpLearnerCV se aggregation default matches learner predict_type", {
+ learner_resp = LearnerRegrDebug$new()
+ learner_resp$predict_type = "response"
+ po_resp = PipeOpLearnerCV$new(learner_resp)
+ expect_true("resampling.se_aggr" %in% po_resp$param_set$ids())
+ expect_identical(po_resp$param_set$values$resampling.se_aggr, "none")
+
+ learner_se = LearnerRegrDebug$new()
+ learner_se$predict_type = "se"
+ po_se = PipeOpLearnerCV$new(learner_se)
+ expect_true(all(c("resampling.se_aggr", "resampling.se_aggr_rho") %in% po_se$param_set$ids()))
+ expect_identical(po_se$param_set$values$resampling.se_aggr, "predictive")
+
+ learner_no_se = lrn("regr.rpart")
+ po_no_se = PipeOpLearnerCV$new(learner_no_se)
+ expect_false(any(c("resampling.se_aggr", "resampling.se_aggr_rho") %in% po_no_se$param_set$ids()))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble averages fold learners", {
+ skip_if_not_installed("rpart")
+ task = tsk("iris")
+ learner = lrn("classif.rpart", predict_type = "prob")
+ po = PipeOpLearnerCV$new(learner,
+ param_vals = list(
+ resampling.folds = 2,
+ resampling.keep_response = TRUE,
+ resampling.predict_method = "cv_ensemble"
+ )
+ )
+
+ trained_task = po$train(list(task))[[1]]
+ expect_setequal(trained_task$feature_names, c(
+ sprintf("%s.response", po$id),
+ paste0(po$id, ".prob.", task$class_names)
+ ))
+ expect_equal(po$state$predict_method, "cv_ensemble")
+ expect_length(po$state$cv_model_states, 2)
+
+ result_task = po$predict(list(task))[[1]]
+ prob_feature_names = paste0(po$id, ".prob.", task$class_names)
+
+ pred_probs = as.matrix(result_task$data(rows = task$row_ids, cols = prob_feature_names))
+ manual_probs = mlr3misc::map(po$state$cv_model_states, function(state) {
+ clone = learner$clone(deep = TRUE)
+ clone$state = state
+ dt = as.data.table(clone$predict(task))
+ data.table::setorder(dt, row_ids)
+ as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+ })
+ manual_prob = Reduce(`+`, manual_probs) / length(manual_probs)
+ colnames(manual_prob) = prob_feature_names
+ expect_equal(pred_probs, manual_prob)
+
+ result_response = result_task$data(rows = task$row_ids, cols = sprintf("%s.response", po$id))[[1]]
+ expect_equal(
+ as.character(result_response),
+ task$class_names[max.col(manual_prob)]
+ )
+})
+
+test_that("PipeOpLearnerCV - cv ensemble drops response when requested", {
+ skip_if_not_installed("rpart")
+ task = tsk("iris")
+ learner = lrn("classif.rpart", predict_type = "prob")
+ po = PipeOpLearnerCV$new(learner,
+ param_vals = list(
+ resampling.predict_method = "cv_ensemble"
+ )
+ )
+ po$train(list(task))
+ result_task = po$predict(list(task))[[1]]
+ expect_true(all(sprintf("%s.prob.%s", po$id, task$class_names) %in% result_task$feature_names))
+ expect_false(any(sprintf("%s.response", po$id) %in% result_task$feature_names))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble averages classif responses", {
+ skip_if_not_installed("rpart")
+ task = tsk("iris")
+ learner = lrn("classif.rpart", predict_type = "response")
+ po = PipeOpLearnerCV$new(learner,
+ param_vals = list(resampling.predict_method = "cv_ensemble")
+ )
+ po$train(list(task))
+ expect_equal(po$state$predict_method, "cv_ensemble")
+ expect_true(length(po$state$cv_model_states) > 1)
+
+ result_task = po$predict(list(task))[[1]]
+ response_feature = sprintf("%s.response", po$id)
+ expect_setequal(result_task$feature_names, response_feature)
+
+ manual_responses = mlr3misc::map(po$state$cv_model_states, function(state) {
+ clone = learner$clone(deep = TRUE)
+ clone$state = state
+ pred_dt = as.data.table(clone$predict(task))
+ data.table::setorderv(pred_dt, "row_ids")
+ as.character(pred_dt$response)
+ })
+
+ manual_matrix = as.matrix(do.call(cbind, manual_responses))
+ n = nrow(manual_matrix)
+ prob_matrix = vapply(task$class_names, function(cls) rowMeans(manual_matrix == cls), numeric(n))
+ if (!is.matrix(prob_matrix)) {
+ prob_matrix = matrix(prob_matrix, ncol = length(task$class_names))
+ }
+ colnames(prob_matrix) = task$class_names
+ manual_response = task$class_names[max.col(prob_matrix, ties.method = "first")]
+ manual_response = factor(manual_response, levels = task$class_names)
+
+ observed_response = result_task$data(rows = task$row_ids, cols = response_feature)[[1]]
+ expect_equal(as.character(observed_response), as.character(manual_response))
+
+ learner_prediction = po$learner_model$predict(task)
+ expect_equal(as.character(learner_prediction$response), as.character(manual_response))
+ pred_dt = as.data.table(learner_prediction)
+ data.table::setorderv(pred_dt, "row_ids")
+ graph_prob = as.matrix(pred_dt[, paste0("prob.", task$class_names), with = FALSE])
+ colnames(graph_prob) = task$class_names
+ expect_equal(graph_prob, prob_matrix)
+})
+
+test_that("PipeOpLearnerCV - cv ensemble log prob aggregation", {
+ skip_if_not_installed("rpart")
+ task = tsk("iris")
+ learner = lrn("classif.rpart", predict_type = "prob")
+ param_vals = list(
+ resampling.folds = 3,
+ resampling.keep_response = TRUE,
+ resampling.predict_method = "cv_ensemble",
+ resampling.prob_aggr = "log",
+ resampling.prob_aggr_eps = 1e-8
+ )
+ po = PipeOpLearnerCV$new(learner, param_vals = param_vals)
+
+ trained_task = po$train(list(task))[[1]]
+ prob_cols = paste0(po$id, ".prob.", task$class_names)
+ expect_true(all(prob_cols %in% trained_task$feature_names))
+
+ result_task = po$predict(list(task))[[1]]
+ result_probs = as.matrix(result_task$data(rows = task$row_ids, cols = prob_cols))
+ manual_probs = mlr3misc::map(po$state$cv_model_states, function(state) {
+ clone = learner$clone(deep = TRUE)
+ clone$state = state
+ dt = as.data.table(clone$predict(task))
+ data.table::setorder(dt, row_ids)
+ as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+ })
+ weights = rep(1 / length(manual_probs), length(manual_probs))
+ expected_probs = mlr3pipelines:::weighted_matrix_logpool(manual_probs, weights, epsilon = param_vals$resampling.prob_aggr_eps)
+ colnames(expected_probs) = prob_cols
+ expect_equal(result_probs, expected_probs, tolerance = 1e-8)
+
+ response_col = sprintf("%s.response", po$id)
+ observed_response = result_task$data(rows = task$row_ids, cols = response_col)[[1]]
+ expected_response = factor(task$class_names[max.col(expected_probs, ties.method = "first")], levels = task$class_names)
+ expect_equal(as.character(observed_response), as.character(expected_response))
+
+ graph_prediction = po$learner_model$predict(task)
+ graph_dt = as.data.table(graph_prediction)
+ data.table::setorder(graph_dt, row_ids)
+ graph_probs = as.matrix(graph_dt[, paste0("prob.", task$class_names), with = FALSE])
+ colnames(graph_probs) = prob_cols
+ expect_equal(graph_probs, expected_probs, tolerance = 1e-8)
+ expect_equal(as.character(graph_dt$response), as.character(expected_response))
+})
+
+test_that("PipeOpLearnerCV - log aggregation with zeros uses epsilon", {
+ backend = data.table::data.table(
+ x = 1:2,
+ y = factor(c("a", "b"), levels = c("a", "b"))
+ )
+ task = TaskClassif$new("two_point", backend = backend, target = "y")
+ learner = lrn("classif.featureless", predict_type = "prob")
+ po = PipeOpLearnerCV$new(learner, param_vals = list(
+ resampling.method = "cv",
+ resampling.folds = 2,
+ resampling.predict_method = "cv_ensemble",
+ resampling.prob_aggr = "log",
+ resampling.prob_aggr_eps = 1e-8
+ ))
+
+ po$train(list(task))
+ result_task = po$predict(list(task))[[1]]
+ prob_cols = paste0(po$id, ".prob.", task$class_names)
+ probs = as.matrix(result_task$data(rows = task$row_ids, cols = prob_cols))
+ expect_false(any(is.nan(probs)))
+ expect_equal(
+ unname(probs),
+ matrix(rep(0.5, length(task$row_ids) * length(task$class_names)),
+ ncol = length(task$class_names), byrow = TRUE
+ )
+ )
+})
+
+test_that("PipeOpLearnerCV - log aggregation epsilon controls shrinkage", {
+ backend = data.table::data.table(
+ x = 1:3,
+ y = factor(c("a", "b", "b"), levels = c("a", "b"))
+ )
+ task = TaskClassif$new("three_point", backend = backend, target = "y")
+ learner = lrn("classif.featureless", predict_type = "prob")
+
+ po = PipeOpLearnerCV$new(learner, param_vals = list(
+ resampling.method = "cv",
+ resampling.folds = 3,
+ resampling.predict_method = "cv_ensemble",
+ resampling.prob_aggr = "log",
+ resampling.prob_aggr_eps = 1e-12
+ ))
+ po$train(list(task))
+ result_task = po$predict(list(task))[[1]]
+
+ manual_probs = mlr3misc::map(po$state$cv_model_states, function(state) {
+ clone = learner$clone(deep = TRUE)
+ clone$state = state
+ dt = as.data.table(clone$predict(task))
+ data.table::setorder(dt, row_ids)
+ as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+ })
+ weights = rep(1 / length(manual_probs), length(manual_probs))
+ expected_eps = mlr3pipelines:::weighted_matrix_logpool(
+ manual_probs, weights, epsilon = po$param_set$values$resampling.prob_aggr_eps
+ )
+ prob_cols = paste0(po$id, ".prob.", task$class_names)
+ observed_eps = as.matrix(result_task$data(rows = task$row_ids, cols = prob_cols))
+ expect_false(any(is.nan(observed_eps)))
+ expect_equal(unname(observed_eps), unname(expected_eps), tolerance = 1e-10)
+
+ po$param_set$values$resampling.prob_aggr_eps = 0
+ po$train(list(task))
+ result_zero = po$predict(list(task))[[1]]
+ manual_probs_zero = mlr3misc::map(po$state$cv_model_states, function(state) {
+ clone = learner$clone(deep = TRUE)
+ clone$state = state
+ dt = as.data.table(clone$predict(task))
+ data.table::setorder(dt, row_ids)
+ as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+ })
+ weights_zero = rep(1 / length(manual_probs_zero), length(manual_probs_zero))
+ manual_zero = mlr3pipelines:::weighted_matrix_logpool(manual_probs_zero, weights_zero, epsilon = 0)
+ colnames(manual_zero) = paste0(po$id, ".prob.", task$class_names)
+ observed_zero = as.matrix(result_zero$data(rows = task$row_ids, cols = prob_cols))
+ expect_equal(unname(observed_zero), unname(manual_zero))
+ expect_equal(observed_zero[, prob_cols[2]], rep(1, task$nrow))
+ expect_equal(observed_zero[, prob_cols[1]], rep(0, task$nrow))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble averages regression predictions", {
+ skip_if_not_installed("rpart")
+ task = TaskRegr$new("mtcars", backend = data.table::as.data.table(mtcars), target = "mpg")
+ learner = lrn("regr.rpart")
+ po = PipeOpLearnerCV$new(learner,
+ param_vals = list(resampling.folds = 2, resampling.predict_method = "cv_ensemble")
+ )
+ po$train(list(task))
+ result_task = po$predict(list(task))[[1]]
+ feature_name = sprintf("%s.response", po$id)
+ expect_true(feature_name %in% result_task$feature_names)
+
+ manual_responses = mlr3misc::map(po$state$cv_model_states, function(state) {
+ clone = learner$clone(deep = TRUE)
+ clone$state = state
+ pred = clone$predict(task)
+ pred$response
+ })
+ manual_average = Reduce(`+`, manual_responses) / length(manual_responses)
+ expect_equal(result_task$data(rows = task$row_ids, cols = feature_name)[[1]], manual_average)
+
+ graph_pred = po$learner_model$predict(task)
+ expect_equal(graph_pred$response, manual_average)
+ expect_true(is.null(graph_pred$se) || all(is.na(graph_pred$se)))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble handles multiplicity", {
+ skip_if_not_installed("rpart")
+ tasks = Multiplicity(tsk("iris"), tsk("sonar"))
+ learner = lrn("classif.rpart", predict_type = "prob")
+ po = po("learner_cv", learner,
+ param_vals = list(resampling.predict_method = "cv_ensemble")
+ )
+
+ train_out = po$train(list(tasks))[[1]]
+ expect_class(train_out, "Multiplicity")
+ expect_equal(length(train_out), 2L)
+ expect_true(all(mlr3misc::map_lgl(train_out, inherits, what = "Task")))
+
+ expect_class(po$state, "Multiplicity")
+ expect_true(all(mlr3misc::map_lgl(po$state, function(st) st$predict_method == "cv_ensemble")))
+ expect_true(all(mlr3misc::map_lgl(po$state, function(st) length(st$cv_model_states) == po$param_set$values$resampling.folds)))
+
+ predict_out = po$predict(list(tasks))[[1]]
+ expect_class(predict_out, "Multiplicity")
+ expect_equal(length(predict_out), 2L)
+ expect_true(all(mlr3misc::map_lgl(predict_out, inherits, what = "Task")))
+
+ orig_tasks = as.list(tasks)
+ pred_tasks = as.list(predict_out)
+ expect_true(all(unlist(Map(function(pred_task, orig_task) {
+ all(pred_task$feature_names %in% paste0(po$id, ".prob.", orig_task$class_names))
+ }, pred_tasks, orig_tasks))))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble requires resampling method cv", {
+ skip_if_not_installed("rpart")
+ po = PipeOpLearnerCV$new(
+ lrn("classif.rpart"),
+ param_vals = list(resampling.method = "insample", resampling.predict_method = "cv_ensemble")
+ )
+ expect_error(po$train(list(tsk("iris"))), "cv_ensemble")
+})
+
+test_that("PipeOpLearnerCV - learner_model returns averaged ensemble", {
+ skip_if_not_installed("rpart")
+ task = tsk("iris")
+ learner = lrn("classif.rpart", predict_type = "prob")
+ po = PipeOpLearnerCV$new(learner,
+ param_vals = list(resampling.predict_method = "cv_ensemble", resampling.keep_response = TRUE)
+ )
+ po$train(list(task))
+
+ learner_model = po$learner_model
+ expect_class(learner_model, "GraphLearner")
+
+ task_prediction = po$predict(list(task))[[1]]
+ dt_po = task_prediction$data(rows = task$row_ids, cols = task_prediction$feature_names)
+
+ graph_prediction = learner_model$predict(task)
+ expect_class(graph_prediction, "PredictionClassif")
+ dt_graph = as.data.table(graph_prediction)
+ data.table::setorder(dt_graph, row_ids)
+
+ prob_cols = paste0(po$id, ".prob.", task$class_names)
+ graph_prob_cols = paste0("prob.", task$class_names)
+ graph_matrix = as.matrix(dt_graph[, graph_prob_cols, with = FALSE])
+ colnames(graph_matrix) = prob_cols
+ expect_equal(as.matrix(dt_po[, prob_cols, with = FALSE]), graph_matrix)
+
+ expect_equal(
+ as.character(dt_po[[sprintf("%s.response", po$id)]]),
+ as.character(dt_graph$response)
+ )
+})
+
+test_that("PipeOpLearnerCV - cv ensemble with predict_type = 'se'", {
+ skip_if_not_installed("mlr3learners")
+ task = tsk("mtcars")
+ learner = lrn("regr.lm", predict_type = "se")
+ po = PipeOpLearnerCV$new(learner,
+ param_vals = list(resampling.predict_method = "cv_ensemble")
+ )
+ po$train(list(task))
+ result_task = po$predict(list(task))[[1]]
+
+ response_col = sprintf("%s.response", po$id)
+ se_col = sprintf("%s.se", po$id)
+ expect_true(all(c(response_col, se_col) %in% result_task$feature_names))
+
+ manual_preds = mlr3misc::map(po$state$cv_model_states, function(state) {
+ clone = learner$clone(deep = TRUE)
+ clone$state = state
+ clone$predict(task)
+ })
+
+ manual_dt = mlr3misc::map(manual_preds, function(pred) {
+ dt = as.data.table(pred)
+ data.table::setorderv(dt, "row_ids")
+ list(response = dt$response, se = dt$se)
+ })
+ manual_response = Reduce(`+`, mlr3misc::map(manual_dt, "response")) / length(manual_dt)
+ expect_equal(result_task$data(rows = task$row_ids, cols = response_col)[[1]], manual_response)
+
+ weights = rep(1 / length(manual_dt), length(manual_dt))
+ manual_se = mlr3pipelines:::aggregate_se_weighted(
+ mlr3misc::map(manual_dt, "response"),
+ mlr3misc::map(manual_dt, "se"),
+ weights = weights,
+ method = "predictive",
+ rho = 0
+ )
+ expect_equal(result_task$data(rows = task$row_ids, cols = se_col)[[1]], manual_se)
})
test_that("PipeOpLearnerCV - within resampling", {
@@ -142,6 +540,19 @@ test_that("marshal", {
test_that("marshal multiplicity", {
skip_if_not_installed("rpart")
skip_if_not_installed("bbotk")
+ if (!"mlr3pipelines" %in% rownames(installed.packages())) {
+ expect_man_exists <<- function(man) {
+ checkmate::expect_string(man, na.ok = TRUE, fixed = "::")
+ if (!is.na(man)) {
+ parts = strsplit(man, "::", fixed = TRUE)[[1L]]
+ if (parts[1L] %nin% rownames(installed.packages())) {
+ return(invisible(NULL))
+ }
+ matches = help.search(parts[2L], package = parts[1L], ignore.case = FALSE)
+ checkmate::expect_data_frame(matches$matches, min.rows = 1L, info = "man page lookup")
+ }
+ }
+ }
po = po("learner_cv", learner = lrn("classif.debug"))
po$train(list(Multiplicity(tsk("iris"), tsk("sonar"))))
s = po$state
@@ -193,6 +604,25 @@ test_that("marshal multiplicity", {
})
+test_that("marshal with cv ensemble", {
+ skip_if_not_installed("rpart")
+ task = tsk("iris")
+ po = po("learner_cv", learner = lrn("classif.rpart", predict_type = "prob"),
+ param_vals = list(resampling.predict_method = "cv_ensemble"))
+ po$train(list(task))
+ expect_equal(po$state$predict_method, "cv_ensemble")
+ marshaled = marshal_model(po$state)
+ expect_true(is_marshaled_model(marshaled) || inherits(marshaled, "pipeop_learner_cv_state"))
+ unmarshaled = unmarshal_model(marshaled)
+ expect_equal(names(unmarshaled), names(po$state))
+ expect_equal(length(unmarshaled$cv_model_states), length(po$state$cv_model_states))
+ po$state = unmarshaled
+ expect_equal(
+ po$predict(list(task)),
+ po$predict(list(task))
+ )
+})
+
test_that("state class and multiplicity", {
po = po("learner_cv", learner = lrn("classif.debug"))
po$train(list(Multiplicity(tsk("iris"))))
@@ -206,3 +636,87 @@ test_that("state class and multiplicity", {
expect_class(po1$state[[1L]], "Multiplicity")
expect_class(po1$state[[1L]][[1L]], "pipeop_learner_cv_state")
})
+
+test_that("PipeOpLearnerCV cv ensemble aggregates SE like PipeOpRegrAvg", {
+ task_backend = data.table::data.table(
+ x1 = c(1, 2, 3, 4),
+ x2 = c(4, 3, 2, 1),
+ y = c(2, 4, 5, 7)
+ )
+ task = TaskRegr$new("debug_se_task", backend = task_backend, target = "y")
+ configs = list(
+ list(se_aggr = "none", rho = NULL),
+ list(se_aggr = "between", rho = NULL),
+ list(se_aggr = "within", rho = NULL),
+ list(se_aggr = "predictive", rho = NULL),
+ list(se_aggr = "mean", rho = 0),
+ list(se_aggr = "mean", rho = 1),
+ list(se_aggr = "mean", rho = -0.5)
+ )
+
+ for (cfg in configs) {
+ learner = LearnerRegrDebug$new()
+ learner$predict_type = "se"
+ param_vals = list(
+ resampling.method = "cv",
+ resampling.folds = 2,
+ resampling.predict_method = "cv_ensemble",
+ resampling.se_aggr = cfg$se_aggr
+ )
+ if (!is.null(cfg$rho)) {
+ param_vals$resampling.se_aggr_rho = cfg$rho
+ }
+ po = PipeOpLearnerCV$new(learner, param_vals = param_vals)
+
+ po$train(list(task))
+ result_task = po$predict(list(task))[[1]]
+ col_response = sprintf("%s.response", po$id)
+ col_se = sprintf("%s.se", po$id)
+
+ expect_true(col_response %in% result_task$feature_names)
+
+ base_preds = mlr3misc::map(po$state$cv_model_states, function(st) {
+ base = LearnerRegrDebug$new()
+ base$predict_type = "se"
+ base$state = st
+ pred = base$predict(task)
+ pred_dt = as.data.table(pred)
+ data.table::setorder(pred_dt, row_ids)
+ list(response = pred_dt$response, se = pred_dt$se)
+ })
+
+ k = length(base_preds)
+ weights = rep(1 / k, k)
+ response_list = mlr3misc::map(base_preds, "response")
+ expected_response = Reduce(`+`, response_list) / k
+ se_list = mlr3misc::map(base_preds, "se")
+ expected_se = mlr3pipelines:::aggregate_se_weighted(
+ response_list,
+ se_list,
+ weights = weights,
+ method = cfg$se_aggr,
+ rho = cfg$rho %??% 0
+ )
+
+ observed_response = result_task$data(rows = task$row_ids, cols = col_response)[[1]]
+ expect_equal(observed_response, expected_response)
+
+ if (is.null(expected_se)) {
+ expect_false(col_se %in% result_task$feature_names)
+ } else {
+ expect_true(col_se %in% result_task$feature_names)
+ observed_se = result_task$data(rows = task$row_ids, cols = col_se)[[1]]
+ expect_equal(observed_se, expected_se)
+ }
+
+ learner_model = po$learner_model
+ expect_class(learner_model, "GraphLearner")
+ graph_pred = learner_model$predict(task)
+ expect_equal(graph_pred$response, expected_response)
+ if (is.null(expected_se)) {
+ expect_true(is.null(graph_pred$se) || all(is.na(graph_pred$se)))
+ } else {
+ expect_equal(graph_pred$se, expected_se)
+ }
+ }
+})
diff --git a/tests/testthat/test_pipeop_regravg.R b/tests/testthat/test_pipeop_regravg.R
new file mode 100644
index 000000000..e17e47276
--- /dev/null
+++ b/tests/testthat/test_pipeop_regravg.R
@@ -0,0 +1,162 @@
+context("PipeOpRegrAvg")
+
+predict_regravg = function(predictions, se_aggr, weights, se_aggr_rho = NULL) {
+ po = po("regravg")
+ po$param_set$values$weights = weights
+ po$param_set$values$se_aggr = se_aggr
+ if (!is.null(se_aggr_rho)) {
+ po$param_set$values$se_aggr_rho = se_aggr_rho
+ }
+ train_nulls = replicate(length(predictions), NULL, simplify = FALSE)
+ po$train(train_nulls)
+ po$predict(predictions)[[1]]
+}
+
+test_that("PipeOpRegrAvg se aggregation methods return expected SE", {
+ row_ids = 1:2
+ truth = c(0.5, -1.2)
+ responses = list(
+ c(1, 4),
+ c(2, 5),
+ c(6, 7)
+ )
+ ses = list(
+ c(0.2, 0.3),
+ c(0.4, 0.5),
+ c(0.6, 0.7)
+ )
+ weights = c(0.2, 0.3, 0.5)
+
+ make_predictions = function(responses, ses_list = NULL) {
+ lapply(seq_along(responses), function(i) {
+ args = list(
+ row_ids = row_ids,
+ truth = truth,
+ response = responses[[i]]
+ )
+ if (!is.null(ses_list)) {
+ args$se = ses_list[[i]]
+ }
+ do.call(PredictionRegr$new, args)
+ })
+ }
+
+ preds_with_se = make_predictions(responses, ses)
+ preds_without_se = make_predictions(responses, NULL)
+
+ response_matrix = do.call(cbind, responses)
+ expected_response = as.numeric(response_matrix %*% weights)
+
+ weight_matrix = matrix(weights, nrow = length(row_ids), ncol = length(weights), byrow = TRUE)
+ between = rowSums((response_matrix^2) * weight_matrix) - expected_response^2
+ between = pmax(between, 0)
+ expected_between = sqrt(between)
+
+ se_matrix = do.call(cbind, ses)
+ within = rowSums((se_matrix^2) * weight_matrix)
+ within = pmax(within, 0)
+ expected_within = sqrt(within)
+ expected_predictive = sqrt(within + between)
+
+ weight_matrix_sq = matrix(weights^2, nrow = length(row_ids), ncol = length(weights), byrow = TRUE)
+ Sw = rowSums(se_matrix * weight_matrix)
+ S2w2 = rowSums((se_matrix^2) * weight_matrix_sq)
+ expected_mean_rho0 = sqrt(pmax(S2w2, 0))
+ expected_mean_rho1 = sqrt(pmax(Sw^2, 0))
+ rho_min = -1 / (length(weights) - 1)
+ var_rho_min = (1 - rho_min) * S2w2 + rho_min * (Sw^2)
+ expected_mean_rho_min = sqrt(pmax(var_rho_min, 0))
+
+ pred_none = predict_regravg(preds_with_se, "none", weights)
+ expect_equal(pred_none$response, expected_response)
+ expect_true(all(is.na(pred_none$se)))
+ expect_false("se" %in% names(pred_none$data))
+
+ pred_between = predict_regravg(preds_without_se, "between", weights)
+ expect_equal(pred_between$response, expected_response)
+ expect_equal(pred_between$se, expected_between)
+
+ pred_within = predict_regravg(preds_with_se, "within", weights)
+ expect_equal(pred_within$response, expected_response)
+ expect_equal(pred_within$se, expected_within)
+
+ pred_predictive = predict_regravg(preds_with_se, "predictive", weights)
+ expect_equal(pred_predictive$response, expected_response)
+ expect_equal(pred_predictive$se, expected_predictive)
+
+ pred_mean_indep = predict_regravg(preds_with_se, "mean", weights, se_aggr_rho = 0)
+ expect_equal(pred_mean_indep$response, expected_response)
+ expect_equal(pred_mean_indep$se, expected_mean_rho0)
+
+ pred_mean_full = predict_regravg(preds_with_se, "mean", weights, se_aggr_rho = 1)
+ expect_equal(pred_mean_full$response, expected_response)
+ expect_equal(pred_mean_full$se, expected_mean_rho1)
+
+ pred_mean_clamped = predict_regravg(preds_with_se, "mean", weights, se_aggr_rho = -1)
+ expect_equal(pred_mean_clamped$response, expected_response)
+ expect_equal(pred_mean_clamped$se, expected_mean_rho_min)
+})
+
+test_that("PipeOpRegrAvg se aggregation requiring SE errors when SE is missing", {
+ responses = list(
+ c(1, 2),
+ c(3, 4)
+ )
+ weights = c(0.5, 0.5)
+ preds_without_se = lapply(responses, function(resp) {
+ PredictionRegr$new(row_ids = 1:2, truth = c(0, 0), response = resp)
+ })
+
+ expect_error(
+ predict_regravg(preds_without_se, "predictive", weights),
+ "requires `ses_list`"
+ )
+
+ expect_error(
+ predict_regravg(preds_without_se, "mean", weights),
+ "requires `ses_list`"
+ )
+
+ expect_error(
+ predict_regravg(preds_without_se, "within", weights),
+ "requires `ses_list`"
+ )
+})
+
+test_that("PipeOpRegrAvg se aggregation with single prediction behaves correctly", {
+ row_ids = 1:4
+ truth = c(0, 1, 2, 3)
+ response = c(1.1, 2.2, 3.3, 4.4)
+ se = c(0.5, 0.6, 0.7, 0.8)
+ single_pred_with_se = list(PredictionRegr$new(
+ row_ids = row_ids,
+ truth = truth,
+ response = response,
+ se = se
+ ))
+ single_pred_without_se = list(PredictionRegr$new(
+ row_ids = row_ids,
+ truth = truth,
+ response = response
+ ))
+
+ result_none = predict_regravg(single_pred_with_se, "none", weights = 1)
+ expect_equal(result_none$response, response)
+ expect_false("se" %in% names(result_none$data))
+
+ result_between = predict_regravg(single_pred_without_se, "between", weights = 1)
+ expect_equal(result_between$response, response)
+ expect_equal(result_between$se, rep(0, length(response)))
+
+ result_within = predict_regravg(single_pred_with_se, "within", weights = 1)
+ expect_equal(result_within$response, response)
+ expect_equal(result_within$se, se)
+
+ result_predictive = predict_regravg(single_pred_with_se, "predictive", weights = 1)
+ expect_equal(result_predictive$response, response)
+ expect_equal(result_predictive$se, se)
+
+ result_mean = predict_regravg(single_pred_with_se, "mean", weights = 1, se_aggr_rho = 0.25)
+ expect_equal(result_mean$response, response)
+ expect_equal(result_mean$se, se)
+})