diff --git a/.Rbuildignore b/.Rbuildignore
index 523577890..7c75ec931 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -25,3 +25,5 @@
 ^\.vscode$
 ^\.lintr$
 ^\.pre-commit-config\.yaml$
+^AGENTS\.md$
+^CLAUDE\.md$
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 000000000..820e80390
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,104 @@
+
+<persistence>
+1. If the user asked you a question, try to gather information and answer the question to the best of your ability.
+2. If the user asked you to review code, work and gather the required information to give a code review according to the `<guiding_principles>` and general best practices. Do not ask any more questions, just provide a best effort code review.
+3. Otherwise:
+  - You are an agent - please keep going until the user's query is completely resolved, before ending your turn and yielding back to the user.
+  - If the instructions are unclear, try to think of what info you need and gather that info from the user *right away*, so you can then work autonomouslyf for many turns.
+  - Be extra-autonomous. The user wants you to work on your own, once you started.
+  - Only terminate your turn when you are sure that the problem is solved.
+  - Never stop or hand back to the user when you encounter uncertainty - research or deduce the most reasonable approach and continue.
+  - Do not ask the human to confirm or clarify assumptions except at the very beginning, as this can always be adjusted later - decide what the most reasonable assumption is, proceed with it, and document it for the user's reference after you finish acting
+  - You are working inside a secure container, you cannot break anything vital, so do not ask for permission and be bold.
+</persistence>
+<work_loop>
+- At the beginning:
+  - When asked a question about the code or in general, or asked for code review, gather the necessary information and answer right away and finish.
+  - When instructions are unclear, ask clarifying questions at the beginning.
+- During work:
+  - Think before you act. Plan ahead. Feel free to think more than you would otherwise; look at things from different angles, consider different scenarios.
+  - If possible, write a few tests *before* implementing a feature or fixing a bug.
+    - For a bug fix, write a test that captures the bug before fixing the bug.
+    - For a feature, create tests to the degree it is possible. Try really hard. If it is not possible, at least create test-stubs in the form of empty `test_that()` blocks to be filled in later.
+    - Tests should be sensibly thorough. Write more thorough tests only when asked by the user to write tests.
+  - Work and solve upcoming issues independently, using your best judgment
+  - Package progress into organic git commits. You may overwrite commits that are not on 'origin' yet, but do so only if it has great benefit. If you are on git branch `master`, create a new aptly named branch; never commit into `master`. Otherwise, do not leave the current git branch.
+  - Again: create git commits at organic points. In the past, you tended to make too few git commits.
+- If any issues pop up:
+  - If you noticed any things that surprised you, anything that would have helped you substantially with your work if you had known it right away, add it to the `<agent_notes>` section of the `AGENTS.md` file. Future agents will then have access to this information. Use it to capture technical insights, failed approaches, user preferences, and other things future agents should know.
+- After feature implementation, write tests:
+  - If you were asked to implement a feature and have not yet done so, fill in the test_that stubs created earlier or create new tests, to the degree that they make sense.
+  - If you were asked to fix a bug, check again that there are regression tests.
+- When you are done:
+  - Write a short summary of what you did, and what decisions you had to make that went beyond what the user asked of you, and other things the user should know about, as chat response to the user.
+  - Unless you were working on something minor, or you are leaving things as an obvious work-in-progress, do a git commit.
+</work_loop>
+<debugging>
+When fixing problems, always make sure you know the actual reason of the problem first:
+
+1. Form hypotheses about what the issue could be.
+2. Find a way to test these hypotheses and test them. If necessary, ask for assistance from the human, who e.g. may need to interact manually with the software
+3. If you accept a hypothesis, apply an appropriate fix. The fix may not work and the hypothesis may turn out to be false; in that case, undo the fix unless it actually improves code quality overall. Do not leave unnecessary fixes for imaginary issues that never materialized clog up the code.
+</debugging>
+<guiding_principles>
+Straightforwardness: Avoid ideological adherence to other programming principles when something can be solved in a simple, short, straightforward way. Otherwise:
+
+- Simplicity: Favor small, focused components and avoid unnecessary complexity in design or logic.
+- This also means: avoid overly defensive code. Observe the typical level of defensiveness when looking at the code.
+- Idiomaticity: Solve problems the way they "should" be solved, in the respective language: the way a professional in that language would have approached it.
+- Readability and maintainability are primary concerns, even at the cost of conciseness or performance.
+- Doing it right is better than doing it fast. You are not in a rush. Never skip steps or take shortcuts.
+- Tedious, systematic work is often the correct solution. Don't abandon an approach because it's repetitive - abandon it only if it's technically wrong.
+- Honesty is a core value. Be honest about changes you have made and potential negative effects, these are okay. Be honest about shortcomings of other team members' plans and implementations, we all care more about the project than our egos. Be honest if you don't know something: say "I don't know" when appropriate.
+</guiding_principles>
+<project_info>
+
+`mlr3pipelines` is a package that extends the `mlr3` ecosystem by adding preprocessing operations and a way to compose them into computational graphs.
+
+- The package is very object-oriented; most things use R6.
+- Coding style: we use `snake_case` for variables, `UpperCamelCase` for R6 classes. We use `=` for assignment and mostly use the tidyverse style guide otherwise. We use block-indent (two spaces), *not* visual indent; i.e., we don't align code with opening parentheses in function calls, we align by block depth.
+- User-facing API (`@export`ed things, public R6 methods) always need checkmate `asserts_***()` argument checks. Otherwise don't be overly defensive, look at the other code in the project to see our esired level of paranoia.
+- Always read at least `R/PipeOp.R` and `R/PipeOpTaskPreproc.R` to see the base classes you will need in almost every task.
+- Read `R/Graph.R` and `R/GraphLearner.R` to understand the Graph architecture.
+- Before you start coding, look at other relevant `.R` files that do something similar to what you are supposed to implement.
+- We use `testthat`, and most test files are in `tests/testthat/`. Read the additional important helpers in `inst/testthat/helper_functions.R` to understand our `PipeOpTaskPreproc` auto-test framework.
+- Always write tests, execute them with `devtools::test(filter = )` ; the entirety of our tests take a long time, so only run tests for what you just wrote.
+- Tests involving the `$man` field, and tests involving parallelization, do not work well when the package is loaded with `devtools::load_all()`, because of conflicts with the installed version. Ignore these failures, CI will take care of this.
+- The quality of our tests is lower than it ideally should be. We are in the process of improving this over time. Always leave the `tests/testthat/` folder in a better state than what you found it in!
+- If `roxygenize()` / `document()` produce warnings that are unrelated to the code you wrote, ignore them. Do not fix code or formatting that is unrelated to what you are working on, but *do* mention bugs or problems that you noticed it in your final report.
+- When you write examples, make sure they work.
+- A very small number of packages listed in `Suggests:` used by some tests / examples is missing; ignore warnings in that regard. You will never be asked to work on things that require these packages.
+- Packages that we rely on; they generally have good documentation thta can be queried, or they can be looked up on GitHub
+  - `mlr3`, provides `Task`, `Learner`, `Measure`, `Prediction`, various `***Result` classes; basically the foundation on which we build. <https://github.com/mlr-org/mlr3>
+  - `mlr3misc`, provides a lot of helper functions that we prefer to use over base-R when available. <https://github.com/mlr-org/mlr3misc>
+  - `paradox`, provides the hyperparameters-/configuration space: `ps()`, `p_int()`, `p_lgl()`, `p_fct()`, `p_uty()` etc. <https://github.com/mlr-org/paradox>
+  - For the mlr3-ecosystem as a whole, also consider the "mlr3 Book" as a reference, <https://mlr3book.mlr-org.com/>
+- Semantics of paradox ParamSet parameters to pay attention to:
+  - there is a distinction between "default" values and values that a parameter is initialized to: a "default" is the behaviour that happens when the parameter is not given at all; e.g. PipeOpPCA `center` defaults to `TRUE`, since the underlying function (`prcomp`)'s does centering when the `center` argument is not given at all. In contrast, a parameter is "initialized" to some value if it is set to some value upon construction of a PipeOp. In rare cases, this can differ from default, e.g. if the underlying default behaviour is suboptimal for the use for preprocessing (e.g. it stores training data unnecessarily by default).
+  - a parameter can be marked as "required" by having the tag `"required"`. It is a special tag that causes an error if the value is not set. A "required" parameter *can not* have a "default", since semantically this is a contradiction: "default" would describe what happens when the param is not set, but param-not-set is an error.
+  - When we write preprocessing method ourselves we usually don't do "default" behaviour and instead mark most things as "required". "default" is mostly if we wrap some other library's function which itself has a function argument default value.
+  - We initialize a parameter by giving the `p_xxx(init = )` argument. Some old code does `param_set$values = list(...)` or `param_set$values$param = ...` in the constructor. This is deprecated; we do not unnecessarily change it in old code, but new code should have `init = `. A parameter should be documented as "initialized to" something if and only if the value is set through one of these methods in the constructor.
+  - Inside the train / predict functions of PipeOps, hyperparameter values should be obtained through `pv = self$param_set$get_values(tags = )`, where `tags` is often `"train"`, `"predict"`, or some custom tag that groups hyperparameters by meaning somehow (e.g. everything that should be passed to a specific function). A nice pattern is to call a function `fname` with many options configured through `pv` while also explicitly passing some arguments as `invoke(fname, arg1 = val1, arg2 = val2, .args = pv)`, using `invoke` from `mlr3misc`.
+  - paradox does type-checking and range-checking automatically; `get_values()` automatically checks that `"required"` params are present and not `NULL`. Therefore, we only do additional parameter feasibility checks in the rarest of cases.
+- Minor things to be aware of:
+  - Errors that are thrown in PipeOps are automatically wrapped by Graph to also mention the PipeOp ID, so it is not necessary to include that in error messages.
+
+</project_info>
+<agent_notes>
+
+# Notes by Agents to other Agents
+
+- R unit tests in this repo assume helper `expect_man_exists()` is available. If you need to call it in a new test and you are working without mlr3pipelines installed, define a local fallback at the top of that test file before `expect_learner()` is used.
+- Revdep helper scripts live in `attic/revdeps/`. `download_revdeps.R` downloads reverse dependency source tarballs; `install_revdep_suggests.R` installs Suggests for those revdeps without pulling the revdeps themselves.
+
+</agent_notes>
+<your_task>
+Again, when implementing something, focus on:
+
+1. Think things through and plan ahead.
+2. Tests before implementation, if possible. In any case, write high quality tests, try to be better than the tests you find in this project.
+3. Once you started, work independently; we can always undo things if necessary.
+4. Create sensible intermediate commits.
+5. Check your work, make sure tests pass. But do not run *all* tests, they take a long time.
+6. Write a report to the user at the end, informing about decisoins that were made autonomously, unexpected issues etc.
+</your_task>
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 000000000..47dc3e3d8
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/DESCRIPTION b/DESCRIPTION
index 7a54a3604..4f636716d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -107,7 +107,7 @@ Config/testthat/edition: 3
 Config/testthat/parallel: true
 NeedsCompilation: no
 Roxygen: list(markdown = TRUE, r6 = FALSE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 VignetteBuilder: knitr, rmarkdown
 Collate:
     'CnfAtom.R'
diff --git a/NEWS.md b/NEWS.md
index c818f1477..ec1efe5c2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,8 @@
 * Fix: Added internal workaround for `PipeOpNMF` attaching `Biobase`, `BiocGenerics`, and `generics` to the search path during training, prediction or when printing its `$state`.
 * feat: allow dates in datefeatures pipe op and use data.table for date feature generation.
 * Added support for internal validation tasks to `PipeOpFeatureUnion`.
+* feat: `PipeOpLearnerCV` can reuse the cross-validation models during prediction by averaging their outputs (`resampling.predict_method = "cv_ensemble"`).
+* feat: `PipeOpRegrAvg` gets new `se_aggr` and `se_aggr_rho` hyperparameters and now allows various forms of SE aggregation.
 
 # mlr3pipelines 0.9.0
 
@@ -304,4 +306,3 @@
 # mlr3pipelines 0.1.0
 
 * Initial upload to CRAN.
-
diff --git a/R/PipeOpClassifAvg.R b/R/PipeOpClassifAvg.R
index 6b61b8099..1ea2a7fe7 100644
--- a/R/PipeOpClassifAvg.R
+++ b/R/PipeOpClassifAvg.R
@@ -11,8 +11,23 @@
 #' Always returns a `"prob"` prediction, regardless of the incoming [`Learner`][mlr3::Learner]'s
 #' `$predict_type`. The label of the class with the highest predicted probability is selected as the
 #' `"response"` prediction. If the [`Learner`][mlr3::Learner]'s `$predict_type` is set to `"prob"`,
-#' the prediction obtained is also a `"prob"` type prediction with the probability predicted to be a
-#' weighted average of incoming predictions.
+#' the probability aggregation is controlled by `prob_aggr` (see below). If `$predict_type = "response"`,
+#' predictions are internally converted to one-hot probability vectors (point mass on the predicted class) before aggregation.
+#'
+#' ### `"prob"` aggregation:
+#'
+#' * **`prob_aggr = "mean"`** -- *Linear opinion pool (arithmetic mean of probabilities; default)*.
+#'   **Interpretation.** Mixture semantics: choose a base model with probability `w[i]`, then draw from its class distribution.
+#'   Decision-theoretically, this is the minimizer of `sum(w[i] * KL(p[i] || p))` over probability vectors `p`, where `KL(x || y)` is the Kullback-Leibler divergence.
+#'   **Typical behavior.** Conservative / better calibrated and robust to near-zero probabilities (never assigns zero unless all do).
+#'   This is the standard choice for probability averaging in ensembles and stacking.
+#'
+#' * **`prob_aggr = "log"`** -- *Log opinion pool / product of experts (geometric mean in probability space)*:
+#'   Average per-model logs (or equivalently, logits) and apply softmax.
+#'   **Interpretation.** Product semantics: `p_ens ~ prod_i p_i^{w[i]}`; minimizes `sum(w[i] * KL(p || p[i]))`.
+#'   **Typical behavior.** Sharper / lower entropy (emphasizes consensus regions), but can be **overconfident** and is sensitive
+#'   to zeros; use `prob_aggr_eps` to clip small probabilities for numerical stability. Often beneficial with strong, similarly
+#'   calibrated members (e.g., neural networks), less so when calibration is the priority.
 #'
 #' All incoming [`Learner`][mlr3::Learner]'s `$predict_type` must agree.
 #'
@@ -45,7 +60,14 @@
 #' The `$state` is left empty (`list()`).
 #'
 #' @section Parameters:
-#' The parameters are the parameters inherited from the [`PipeOpEnsemble`].
+#' The parameters are the parameters inherited from the [`PipeOpEnsemble`], as well as:
+#' * `prob_aggr` :: `character(1)`\cr
+#'   Controls how incoming class probabilities are aggregated. One of `"mean"` (linear opinion pool; default) or
+#'   `"log"` (log opinion pool / product of experts). See the description above for definitions and interpretation.
+#'   Only has an effect if the incoming predictions have `"prob"` values.
+#' * `prob_aggr_eps` :: `numeric(1)`\cr
+#'   Small positive constant used only for `prob_aggr = "log"` to clamp probabilities before taking logs, improving numerical
+#'   stability and avoiding `-Inf`. Ignored for `prob_aggr = "mean"`. Default is `1e-12`.
 #'
 #' @section Internals:
 #' Inherits from [`PipeOpEnsemble`] by implementing the `private$weighted_avg_predictions()` method.
@@ -81,7 +103,11 @@ PipeOpClassifAvg = R6Class("PipeOpClassifAvg",
   inherit = PipeOpEnsemble,
   public = list(
     initialize = function(innum = 0, collect_multiplicity = FALSE, id = "classifavg", param_vals = list()) {
-      super$initialize(innum, collect_multiplicity, id, param_vals = param_vals, prediction_type = "PredictionClassif", packages = "stats")
+      param_set = ps(
+        prob_aggr = p_fct(levels = c("mean", "log"), init = "mean", tags = c("predict", "prob_aggr")),
+        prob_aggr_eps = p_dbl(lower = 0, upper = 1, default = 1e-12, tags = c("predict", "prob_aggr"), depends = quote(prob_aggr == "log"))
+      )
+      super$initialize(innum, collect_multiplicity, id, param_set = param_set, param_vals = param_vals, prediction_type = "PredictionClassif", packages = "stats")
     }
   ),
   private = list(
@@ -96,7 +122,13 @@ PipeOpClassifAvg = R6Class("PipeOpClassifAvg",
 
       prob = NULL
       if (every(inputs, function(x) !is.null(x$prob))) {
-        prob = weighted_matrix_sum(map(inputs, "prob"), weights)
+        pv = self$param_set$get_values(tags = "prob_aggr")
+        if (pv$prob_aggr == "mean") {
+          prob = weighted_matrix_sum(map(inputs, "prob"), weights)
+        } else {  # prob_aggr == "log"
+          epsilon = pv$prob_aggr_eps %??% 1e-12
+          prob = weighted_matrix_logpool(map(inputs, "prob"), weights, epsilon = epsilon)
+        }
       } else if (every(inputs, function(x) !is.null(x$response))) {
         prob = weighted_factor_mean(map(inputs, "response"), weights, lvls)
       } else {
diff --git a/R/PipeOpEnsemble.R b/R/PipeOpEnsemble.R
index a93acdcf6..79404ce87 100644
--- a/R/PipeOpEnsemble.R
+++ b/R/PipeOpEnsemble.R
@@ -178,6 +178,25 @@ weighted_matrix_sum = function(matrices, weights) {
   accmat
 }
 
+# Weighted log-opinion pool (geometric) aggregation of probability matrices
+# Rows = samples, columns = classes. Each matrix must have the same shape.
+# @param matrices list of matrices: per-learner probabilities
+# @param weights numeric: weights, same length as `matrices` (assumed to sum to 1 upstream)
+# @param epsilon numeric(1): small positive constant to clamp probabilities before log
+# @return matrix: row-normalized aggregated probabilities (same shape as inputs)
+weighted_matrix_logpool = function(matrices, weights, epsilon = 1e-12) {
+  assert_list(matrices, types = "matrix", min.len = 1)
+  assert_numeric(weights, len = length(matrices), any.missing = FALSE, finite = TRUE)
+  assert_number(epsilon, lower = 0, upper = 1)
+  acc = weights[1] * log(pmax(matrices[[1]], epsilon))
+  for (idx in seq_along(matrices)[-1]) {
+    acc = acc + weights[idx] * log(pmax(matrices[[idx]], epsilon))
+  }
+  P = exp(acc)
+  sweep(P, 1L, rowSums(P), "/")
+}
+
+
 # For a set of n `factor` vectors each of length l with the same k levels and a
 # numeric weight vector of length n, returns a matrix of dimension l times k.
 # Each cell contains the weighted relative frequency of the respective factor
diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R
index 644aba548..116e9356b 100644
--- a/R/PipeOpLearnerCV.R
+++ b/R/PipeOpLearnerCV.R
@@ -10,6 +10,8 @@
 #' Returns cross-validated predictions during training as a [`Task`][mlr3::Task] and stores a model of the
 #' [`Learner`][mlr3::Learner] trained on the whole data in `$state`. This is used to create a similar
 #' [`Task`][mlr3::Task] during prediction.
+#' Optionally, the fitted models obtained during the resampling phase can be reused for prediction by averaging
+#' their predictions, avoiding the need for an additional fit on the complete training data.
 #'
 #' The [`Task`][mlr3::Task] gets features depending on the capsuled [`Learner`][mlr3::Learner]'s
 #' `$predict_type`. If the [`Learner`][mlr3::Learner]'s `$predict.type` is `"response"`, a feature `<ID>.response` is created,
@@ -60,6 +62,11 @@
 #'   Errors logged during prediction.
 #' * `predict_time` :: `NULL` | `numeric(1)`
 #'   Prediction time, in seconds.
+#' * `predict_method` :: `character(1)`\cr
+#'   `"full"` when prediction uses a learner fitted on all training data, `"cv_ensemble"` when predictions are averaged over
+#'   models trained on resampling folds.
+#' * `cv_model_states` :: `NULL` | `list`\cr
+#'   Present for `predict_method = "cv_ensemble"`. Contains the states of the learners trained on each resampling fold.
 #'
 #' This state is given the class `"pipeop_learner_cv_state"`.
 #'
@@ -73,6 +80,27 @@
 #'   Number of cross validation folds. Initialized to 3. Only used for `resampling.method = "cv"`.
 #' * `keep_response` :: `logical(1)`\cr
 #'   Only effective during `"prob"` prediction: Whether to keep response values, if available. Initialized to `FALSE`.
+#' * `resampling.predict_method` :: `character(1)`\cr
+#'   Controls how predictions are produced after training. `"full"` (default) fits the wrapped learner on the entire training data.
+#'   `"cv_ensemble"` reuses the models fitted during resampling and averages their predictions. This option currently supports
+#'   classification and regression learners together with `resampling.method = "cv"`.
+#' * `resampling.prob_aggr` :: `character(1)`\cr
+#'   Probability aggregation used when `"cv_ensemble"` predictions are produced for classification learners that can emit class probabilities.
+#'   Shares the semantics with [`PipeOpClassifAvg`]: `"mean"` (linear opinion pool, default) and `"log"` (log opinion pool / product of experts).
+#'   Only present for learners that support `"prob"` predictions.
+#' * `resampling.prob_aggr_eps` :: `numeric(1)`\cr
+#'   Stabilization constant applied when `resampling.prob_aggr = "log"` to clamp probabilities before taking logarithms.
+#'   Defaults to `1e-12`. Only present for learners that support `"prob"` predictions.
+#' * `resampling.se_aggr` :: `character(1)`\cr
+#'   Standard error aggregation used when `"cv_ensemble"` predictions are produced for regression learners with `predict_type`
+#'   containing `"se"`. Shares the definitions with [`PipeOpRegrAvg`], i.e. `"predictive"`, `"mean"`, `"within"`, `"between"`, `"none"`.
+#'   Initialized to `"predictive"` (within-fold variance plus between-fold disagreement) when constructed with a [`Learner`][mlr3::Learner] that has `predict_type = "se"`;
+#'   otherwise to `"none"`.\cr
+#'   Only present for learners that support `"se"` predictions.
+#' * `resampling.se_aggr_rho` :: `numeric(1)`\cr
+#'   Equicorrelation parameter for `resampling.se_aggr = "mean"`, interpreted as in [`PipeOpRegrAvg`]. Ignored otherwise.
+#'   Defaults to `0` when `resampling.se_aggr = "mean"`.\cr
+#'   Only present for learners that support `"se"` predictions.
 #'
 #' @section Internals:
 #' The `$state` is currently not updated by prediction, so the `$state$predict_log` and `$state$predict_time` will always be `NULL`.
@@ -111,6 +139,7 @@
 #' graph$train(task)
 #'
 #' graph$pipeops$classif.rpart$learner$predict_type = "prob"
+#' graph$pipeops$classif.rpart$param_set$values$resampling.predict_method = "cv_ensemble"
 #'
 #' graph$train(task)
 PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
@@ -123,12 +152,35 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       type = private$.learner$task_type
       task_type = mlr_reflections$task_types[type, mult = "first"]$task
 
-      private$.crossval_param_set = ps(
-        method = p_fct(levels = c("cv", "insample"), tags = c("train", "required")),
-        folds = p_int(lower = 2L, upper = Inf, tags = c("train", "required")),
-        keep_response = p_lgl(tags = c("train", "required"))
+      params = list(
+        method = p_fct(levels = c("cv", "insample"), init = "cv", tags = c("train", "required")),
+        folds = p_int(lower = 2L, upper = Inf, init = 3, tags = c("train", "required")),
+        keep_response = p_lgl(init = FALSE, tags = c("train", "required")),
+        predict_method = p_fct(levels = c("full", "cv_ensemble"), init = "full", tags = c("train", "required"))
       )
-      private$.crossval_param_set$values = list(method = "cv", folds = 3, keep_response = FALSE)
+
+      if ("prob" %in% private$.learner$predict_types) {
+        params$prob_aggr = p_fct(
+          levels = c("mean", "log"),
+          init = "mean",
+          tags = c("train", "predict", "prob_aggr", "required")
+        )
+        params$prob_aggr_eps = p_dbl(
+          lower = 0,
+          upper = 1,
+          default = 1e-12,
+          tags = c("train", "predict", "prob_aggr"),
+          depends = quote(prob_aggr == "log")
+        )
+      }
+
+      if ("se" %in% private$.learner$predict_types) {
+        params$se_aggr = p_fct(levels = c("predictive", "mean", "within", "between", "none"), tags = c("train", "predict", "se_aggr", "required"),
+          init = if (private$.learner$predict_type == "se") "predictive" else "none")
+        params$se_aggr_rho = p_dbl(lower = -1, upper = 1, tags = c("train", "predict", "se_aggr"), depends = quote(se_aggr == "mean"), default = 0)
+      }
+
+      private$.crossval_param_set = ParamSet$new(params)
       # Dependencies in paradox have been broken from the start and this is known since at least a year:
       # https://github.com/mlr-org/paradox/issues/216
       # The following would make it _impossible_ to set "method" to "insample", because then "folds"
@@ -159,7 +211,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       if (is.null(self$state) || is_noop(self$state)) {
         private$.learner
       } else {
-        multiplicity_recurse(self$state, clone_with_state, learner = private$.learner)
+        multiplicity_recurse(self$state, private$state_to_model)
       }
     },
     predict_type = function(val) {
@@ -175,17 +227,37 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
     .train_task = function(task) {
       on.exit({private$.learner$state = NULL})
 
-      # Train a learner for predicting
-      self$state = private$.learner$train(task)$state
       pv = private$.crossval_param_set$values
+      predict_method = pv$predict_method %??% "full"
+
+      if (predict_method == "cv_ensemble") {
+        if (pv$method != "cv") {
+          stopf("`resampling.predict_method = \"cv_ensemble\"` requires `resampling.method = \"cv\"`, got '%s'.", pv$method)
+        }
+        private$assert_cv_predict_supported()
+      }
+
+      cv_model_states = NULL
+      if (predict_method == "full") {
+        # Train a learner for predicting
+        self$state = private$.learner$train(task)$state
+        self$state$predict_method = "full"
+      }
 
       # Compute CV Predictions
       if (pv$method != "insample") {
         rdesc = mlr_resamplings$get(pv$method)
         if (pv$method == "cv") rdesc$param_set$values = list(folds = pv$folds)
-        rr = resample(task, private$.learner, rdesc)
+        rr = resample(task, private$.learner, rdesc, store_models = predict_method == "cv_ensemble")
         prds = as.data.table(rr$prediction(predict_sets = "test"))
+        if (predict_method == "cv_ensemble") {
+          cv_model_states = map(rr$learners, "state")
+          self$state = private$make_cv_state(cv_model_states)
+        }
       } else {
+        if (predict_method == "cv_ensemble") {
+          stop("`resampling.predict_method = \"cv_ensemble\"` can not be combined with `resampling.method = \"insample\"`.")
+        }
         prds = as.data.table(private$.learner$predict(task))
       }
 
@@ -194,9 +266,18 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
 
     .predict_task = function(task) {
       on.exit({private$.learner$state = NULL})
-      private$.learner$state = self$state
-      prediction = as.data.table(private$.learner$predict(task))
-      private$pred_to_task(prediction, task)
+      state = self$state
+      predict_method = private$get_predict_method(state)
+      prediction_dt = if (predict_method == "cv_ensemble") {
+        if (is.null(state$cv_model_states) || !length(state$cv_model_states)) {
+          stop("`resampling.predict_method = \"cv_ensemble\"` was selected, but no stored model states are available.")
+        }
+        private$predict_from_cv_models(task, state$cv_model_states)
+      } else {
+        private$.learner$state = state
+        as.data.table(private$.learner$predict(task))
+      }
+      private$pred_to_task(prediction_dt, task)
     },
 
     pred_to_task = function(prds, task) {
@@ -204,6 +285,10 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       if (!self$param_set$values$resampling.keep_response && self$learner$predict_type == "prob") {
         prds[, response := NULL]
       }
+      se_aggr = private$.crossval_param_set$get_values()$se_aggr %??% "none"
+      if ((self$learner$predict_type != "se" || se_aggr == "none") && "se" %in% colnames(prds)) {
+        set(prds, j = "se", value = NULL)
+      }
       renaming = setdiff(colnames(prds), c("row_id", "row_ids"))
       setnames(prds, renaming, sprintf("%s.%s", self$id, renaming))
 
@@ -213,6 +298,162 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       setnames(prds, old = row_id_col, new = task$backend$primary_key)
       task$select(character(0))$cbind(prds)
     },
+    predict_from_cv_models = function(task, cv_model_states) {
+      predictions = map(cv_model_states, function(state) {
+        private$.learner$state = state
+        pred = private$.learner$predict(task)
+        private$.learner$state = NULL
+        pred
+      })
+      private$aggregate_predictions(predictions)
+    },
+    aggregate_predictions = function(predictions) {
+      if (!length(predictions)) stop("No predictions available to aggregate.")
+      alignment = private$align_predictions(predictions)
+      task_type = private$.learner$task_type
+      if (task_type == "classif") {
+        return(private$aggregate_classif_predictions(alignment))
+      }
+      if (task_type == "regr") {
+        return(private$aggregate_regr_predictions(alignment))
+      }
+      stopf("`resampling.predict_method = \"cv_ensemble\"` is not implemented for task type '%s'.", task_type)
+    },
+    align_predictions = function(predictions) {
+      row_ids = predictions[[1]]$row_ids
+      ordering = order(row_ids)
+      row_ids = row_ids[ordering]
+      align_prediction = function(pred) {
+        idx = match(row_ids, pred$row_ids)
+        if (anyNA(idx)) {
+          stop("Mismatch in row ids between CV predictions.")
+        }
+        list(pred = pred, idx = idx)
+      }
+      aligned = map(predictions, align_prediction)
+      list(row_ids = row_ids, aligned = aligned)
+    },
+    # Note: The following aggregation methods use similar logic to PipeOpClassifAvg and PipeOpRegrAvg
+    # (particularly the weighted_matrix_sum and weighted_factor_mean helper functions from PipeOpEnsemble).
+    # However, they return data.table instead of Prediction objects to integrate with pred_to_task() and
+    # handle row alignment specific to CV fold predictions. This design avoids the overhead of creating
+    # intermediate Prediction objects that would need to be immediately converted to data.table.
+    aggregate_classif_predictions = function(alignment) {
+      aligned = alignment$aligned
+      n = length(aligned)
+      weights = rep(1, n)
+      weights = weights / sum(weights)
+      prob_list = map(aligned, function(x) x$pred$prob)
+      prob_cfg = private$.crossval_param_set$get_values(tags = "prob_aggr")
+      if (length(prob_list) && all(map_lgl(prob_list, Negate(is.null)))) {
+        prob_mats = map(seq_along(prob_list), function(i) prob_list[[i]][aligned[[i]]$idx, , drop = FALSE])
+        prob = switch(prob_cfg$prob_aggr,
+          mean = weighted_matrix_sum(prob_mats, weights),
+          log = weighted_matrix_logpool(prob_mats, weights, epsilon = prob_cfg$prob_aggr_eps %??% 1e-12)
+        )
+        prob = pmin(pmax(prob, 0), 1)
+        lvls = colnames(prob)
+        response = factor(lvls[max.col(prob, ties.method = "first")], levels = lvls)
+        prob_dt = data.table(prob)
+        setnames(prob_dt, paste0("prob.", lvls))
+        dt = data.table(row_ids = alignment$row_ids, response = response)
+        dt = cbind(dt, prob_dt)
+        return(dt)
+      }
+      responses = map(aligned, function(x) x$pred$response[x$idx])
+      lvls = levels(responses[[1]])
+      freq = weighted_factor_mean(responses, weights, lvls)
+      response = factor(lvls[max.col(freq, ties.method = "first")], levels = lvls)
+      data.table(row_ids = alignment$row_ids, response = response)
+    },
+    aggregate_regr_predictions = function(alignment) {
+      responses = map(alignment$aligned, function(x) x$pred$response[x$idx])
+      k = length(responses)
+      response = Reduce(`+`, responses) / k
+      se_aligned = map(alignment$aligned, function(x) {
+        se = x$pred$se
+        if (is.null(se)) return(NULL)
+        se[x$idx]
+      })
+      ses_list = NULL
+      if (!all(map_lgl(se_aligned, is.null))) {
+        if (any(map_lgl(se_aligned, is.null))) {
+          stop("Learners returned standard errors for only a subset of CV models.")
+        }
+        ses_list = se_aligned
+      }
+      se_cfg = private$.crossval_param_set$get_values()
+      weights = rep(1 / k, k)
+
+      method = se_cfg$se_aggr %??% "none"
+      rho = se_cfg$se_aggr_rho %??% 0
+      se = aggregate_se_weighted(responses, ses_list, weights, method = method, rho = rho)
+      dt = data.table(row_ids = alignment$row_ids, response = response)
+      if (!is.null(se)) {
+        dt[, se := se]
+      }
+      dt
+    },
+    make_cv_state = function(cv_model_states) {
+      list(
+        model = NULL,
+        train_log = NULL,
+        train_time = NA_real_,
+        predict_log = NULL,
+        predict_time = NULL,
+        predict_method = "cv_ensemble",
+        cv_model_states = cv_model_states
+      )
+    },
+    get_predict_method = function(state) {
+      if (is.null(state) || is_noop(state) || !is.list(state)) {
+        return("full")
+      }
+      state$predict_method %??% "full"
+    },
+    assert_cv_predict_supported = function() {
+      if (private$.learner$task_type %nin% c("classif", "regr")) {
+        stopf("`resampling.predict_method = \"cv_ensemble\"` is only supported for classification and regression learners (got '%s').", private$.learner$task_type)
+      }
+    },
+    state_to_model = function(state) {
+      predict_method = private$get_predict_method(state)
+      if (predict_method == "cv_ensemble") {
+        return(private$build_cv_graph_learner(state$cv_model_states))
+      }
+      clone_with_state(private$.learner, state)
+    },
+    build_cv_graph_learner = function(cv_model_states) {
+      assert_list(cv_model_states, types = "list", min.len = 1)
+      pipeops = map(seq_along(cv_model_states), function(i) {
+        po_id = sprintf("%s.cv_model_%02d", self$id, i)
+        polrn = PipeOpLearner$new(private$.learner, id = po_id)
+        polrn$state = cv_model_states[[i]]
+        polrn
+      })
+      agg_id = sprintf("%s.cv_avg", self$id)
+      aggregator = switch(private$.learner$task_type,
+        classif = PipeOpClassifAvg$new(innum = length(pipeops), id = agg_id),
+        regr = PipeOpRegrAvg$new(innum = length(pipeops), id = agg_id),
+        stopf("Task type '%s' not supported for cv ensemble.", private$.learner$task_type)
+      )
+      extra_cfg = list()
+      if (inherits(aggregator, "PipeOpClassifAvg")) {
+        extra_cfg = private$.crossval_param_set$get_values(tags = "prob_aggr")
+      }
+      if (inherits(aggregator, "PipeOpRegrAvg")) {
+        extra_cfg = private$.crossval_param_set$get_values(tags = "se_aggr")
+      }
+      aggregator$param_set$set_values(.values = extra_cfg)
+
+      aggregator$state = list()
+      graph = gunion(pipeops) %>>!% aggregator
+      graph_state = graph$state
+      class(graph_state) = c("graph_learner_model", class(graph_state))
+      glrn = GraphLearner$new(graph)
+      glrn$model = graph_state
+      glrn
+    },
     .crossval_param_set = NULL,
     .learner = NULL,
     .additional_phash_input = function() private$.learner$phash
@@ -224,10 +465,17 @@ marshal_model.pipeop_learner_cv_state = function(model, inplace = FALSE, ...) {
   # Note that a Learner state contains other reference objects, but we don't clone them here, even when inplace
   # is FALSE. For our use-case this is just not necessary and would cause unnecessary overhead in the mlr3
   # workhorse function
-  model$model = marshal_model(model$model, inplace = inplace)
-  # only wrap this in a marshaled class if the model was actually marshaled above
-  # (the default marshal method does nothing)
-  if (is_marshaled_model(model$model)) {
+  was_marshaled = FALSE
+  if (!is.null(model$model)) {
+    model$model = marshal_model(model$model, inplace = inplace)
+    was_marshaled = was_marshaled || is_marshaled_model(model$model)
+  }
+  if (!is.null(model$cv_model_states)) {
+    model$cv_model_states = map(model$cv_model_states, marshal_model, inplace = inplace)
+    was_marshaled = was_marshaled || some(model$cv_model_states, is_marshaled_model)
+  }
+  # only wrap this in a marshaled class if something was actually marshaled above
+  if (was_marshaled) {
     model = structure(
       list(marshaled = model, packages = "mlr3pipelines"),
       class = c(paste0(class(model), "_marshaled"), "marshaled")
@@ -239,9 +487,14 @@ marshal_model.pipeop_learner_cv_state = function(model, inplace = FALSE, ...) {
 #' @export
 unmarshal_model.pipeop_learner_cv_state_marshaled = function(model, inplace = FALSE, ...) {
   state_marshaled = model$marshaled
-  state_marshaled$model = unmarshal_model(state_marshaled$model, inplace = inplace)
+  if (!is.null(state_marshaled$model)) {
+    state_marshaled$model = unmarshal_model(state_marshaled$model, inplace = inplace)
+  }
+  if (!is.null(state_marshaled$cv_model_states)) {
+    state_marshaled$cv_model_states = map(state_marshaled$cv_model_states, unmarshal_model, inplace = inplace)
+  }
   state_marshaled
 }
 
 
-mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ps()))$new()))
+mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ps(), predict_types = "response"))$new()))
diff --git a/R/PipeOpRegrAvg.R b/R/PipeOpRegrAvg.R
index 69cd972a2..85b8b4d4e 100644
--- a/R/PipeOpRegrAvg.R
+++ b/R/PipeOpRegrAvg.R
@@ -9,11 +9,77 @@
 #' [`PipeOpRegrAvg`] to multiple [`PipeOpLearner`] outputs.
 #'
 #' The resulting `"response"` prediction is a weighted average of the incoming `"response"` predictions.
-#' `"se"` prediction is currently not aggregated but discarded if present.
+#' Aggregation of `"se"` predictions is controlled by the `se_aggr` parameter (see below). When `"se"` is not requested
+#' or `se_aggr = "none"`, `"se"` is dropped.
+#'
+#' @section `"se"` Aggregation:
+#'
+#' Let there be `K` incoming predictions with weights `w` (sum to 1). For a given row `j`, denote
+#' per-model means `mu_i[j]` and, if available, per-model standard errors `se_i[j]`.
+#' Define
+#'
+#' ```
+#' mu_bar[j]      = sum_i w[i] * mu_i[j]
+#' var_between[j] = sum_i w[i] * (mu_i[j] - mu_bar[j])^2 # weighted var of means
+#' var_within[j]  = sum_i w[i] * se_i[j]^2               # weighted mean of SE^2s
+#' ```
+#'
+#' The following aggregation methods are available:
+#'
+#' * **`se_aggr = "predictive"`** -- *Within + Between (mixture/predictive SD)*
+#'   ```
+#'   se[j] = sqrt(var_within[j] + var_between[j])
+#'   ```
+#'   **Interpretation.** Treats each incoming `se_i` as that model's predictive SD at the point (or, if the learner
+#'   reports SE of the conditional mean--as many `mlr3` regression learners do--then as that mean-SE). The returned `se`
+#'   is the SD of the *mixture ensemble* under weighted averaging: it increases when base models disagree (epistemic spread)
+#'   and when individual models are uncertain (aleatoric spread).
+#'   **Notes.** If `se_i` represents *mean* SE (common in `predict.lm(se.fit=TRUE)`-style learners), the result
+#'   aggregates those mean-SEs and still adds model disagreement correctly, but it will *underestimate* a true predictive SD
+#'   that would additionally include irreducible noise. Requires `"se"` to be present from **all** inputs.
+#'
+#' * **`se_aggr = "mean"`** -- *SE of the weighted average of means under equicorrelation*
+#'   With a correlation parameter `se_aggr_rho = rho`, assume
+#'   `Cov(mu_i_hat, mu_j_hat) = rho * se_i * se_j` for all `i != j`. Then
+#'   ```
+#'   # components:
+#'   a[j] = sum_i (w[i]^2 * se_i[j]^2)
+#'   b[j] = (sum_i w[i] * se_i[j])^2
+#'   var_mean[j] = (1 - rho) * a[j] + rho * b[j]
+#'   se[j] = sqrt(var_mean[j])
+#'   ```
+#'   **Interpretation.** Returns the *standard error of the averaged estimator* `sum_i w[i] * mu_i`, not a predictive SD.
+#'   Use when you specifically care about uncertainty of the averaged mean itself.
+#'   **Notes.** `rho` is clamped to the PSD range `[-1/(K-1), 1]` for `K > 1`. Typical settings:
+#'   `rho = 0` (assume independence; often optimistic for CV/bagging) and `rho = 1` (perfect correlation; conservative and
+#'   equal to the weighted arithmetic mean of SEs). Requires `"se"` from **all** inputs.
+#'
+#' * **`se_aggr = "within"`** -- *Within-model component only*
+#'   ```
+#'   se[j] = sqrt(var_within[j])
+#'   ```
+#'   **Interpretation.** Aggregates only the average per-model uncertainty and **ignores** disagreement between models.
+#'   Useful as a diagnostic of the aleatoric component; not a full ensemble uncertainty.
+#'   **Notes.** Typically *underestimates* the uncertainty of the ensemble prediction when models disagree.
+#'   Requires `"se"` from **all** inputs.
+#'
+#' * **`se_aggr = "between"`** -- *Between-model component only (works without `"se"`)*
+#'   ```
+#'   se[j] = sqrt(var_between[j])
+#'   ```
+#'   **Interpretation.** Captures only the spread of the base means (epistemic/model disagreement).
+#'   **Notes.** This is the only method that does not use incoming `"se"`. It is a *lower bound* on a full predictive SD,
+#'   because it omits within-model noise.
+#'
+#' * **`se_aggr = "none"`** -- *Do not return `"se"`*
+#'   `"se"` is dropped from the output prediction.
+#'
+#' **Relationships and edge cases.** For any row, `se("predictive") >= max(se("within"), se("between"))`.
+#' With a single input (`K = 1`), `"predictive"` and `"within"` return the input `"se"`, `"between"` returns `0`.
+#' Methods `"predictive"`, `"mean"`, and `"within"` require all inputs to provide `"se"`; otherwise aggregation errors.
 #'
 #' Weights can be set as a parameter; if none are provided, defaults to
 #' equal weights for each prediction.
-#' Defaults to equal weights for each model.
 #'
 #' @section Construction:
 #' ```
@@ -40,7 +106,13 @@
 #' The `$state` is left empty (`list()`).
 #'
 #' @section Parameters:
-#' The parameters are the parameters inherited from the [`PipeOpEnsemble`].
+#' The parameters are the parameters inherited from the [`PipeOpEnsemble`], as well as:
+#' * `se_aggr` :: `character(1)`\cr
+#'   Controls how incoming `"se"` values are aggregated into an ensemble `"se"`. One of
+#'   `"predictive"`, `"mean"`, `"within"`, `"between"`, `"none"`. See the description above for definitions and interpretation.
+#' * `se_aggr_rho` :: `numeric(1)`\cr
+#'   Equicorrelation parameter used only for `se_aggr = "mean"`. Interpreted as the common correlation between
+#'   per-model mean estimators. Recommended range `[0, 1]`; values are clamped to `[-1/(K-1), 1]` for validity.
 #'
 #' @section Internals:
 #' Inherits from [`PipeOpEnsemble`] by implementing the `private$weighted_avg_predictions()` method.
@@ -60,28 +132,39 @@
 #' @examplesIf requireNamespace("rpart")
 #' library("mlr3")
 #'
-#' # Simple Bagging
+#' # Simple Bagging for Regression
 #' gr = ppl("greplicate",
 #'   po("subsample") %>>%
-#'   po("learner", lrn("classif.rpart")),
+#'   po("learner", lrn("regr.rpart")),
 #'   n = 5
 #' ) %>>%
-#'   po("classifavg")
+#'   po("regravg")
 #'
-#' resample(tsk("iris"), GraphLearner$new(gr), rsmp("holdout"))
+#' resample(tsk("mtcars"), GraphLearner$new(gr), rsmp("holdout"))
 PipeOpRegrAvg = R6Class("PipeOpRegrAvg",
   inherit = PipeOpEnsemble,
 
   public = list(
     initialize = function(innum = 0, collect_multiplicity = FALSE, id = "regravg", param_vals = list(), ...) {
-      super$initialize(innum, collect_multiplicity, id, param_vals = param_vals, prediction_type = "PredictionRegr", ...)
+      param_set = ps(
+        se_aggr = p_fct(levels = c("predictive", "mean", "within", "between", "none"), init = "none", tags = c("predict", "se_aggr")),
+        se_aggr_rho = p_dbl(lower = -1, upper = 1, default = 0, tags = c("predict", "se_aggr"), depends = quote(se_aggr == "mean"))
+      )
+      super$initialize(innum, collect_multiplicity, id, param_set = param_set, param_vals = param_vals, prediction_type = "PredictionRegr", ...)
     }
   ),
   private = list(
     weighted_avg_predictions = function(inputs, weights, row_ids, truth) {
-      response_matrix = simplify2array(map(inputs, "response"))
+      responses = map(inputs, "response")
+      ses = map(inputs, function(x) if ("se" %in% names(x$data)) x$data$se else NULL)
+      if (any(map_lgl(ses, is.null))) {
+        ses = NULL
+      }
+      pv = self$param_set$get_values(tags = "se_aggr")
+
+      response_matrix = simplify2array(responses)
       response = c(response_matrix %*% weights)
-      se = NULL
+      se = aggregate_se_weighted(responses, ses, weights, method = pv$se_aggr, rho = pv$se_aggr_rho %??% 0)
 
       PredictionRegr$new(row_ids = row_ids, truth = truth, response = response, se = se)
     }
@@ -89,3 +172,77 @@ PipeOpRegrAvg = R6Class("PipeOpRegrAvg",
 )
 
 mlr_pipeops$add("regravg", PipeOpRegrAvg)
+
+
+# Aggregate SEs from multiple learners with weights.
+#
+# @param means_list list of numeric vectors (length N), per-model mean predictions.
+# @param ses_list   NULL or list of numeric vectors (length N), per-model SEs.
+#                   If non-NULL, must have same length and alignment as means_list.
+# @param weights    numeric vector of length K summing to 1 (checked elsewhere).
+# @param method     one of "none", "predictive", "mean", "within", "between".
+# @param rho        numeric scalar for "mean" method; equicorrelation parameter.
+#                   Will be clamped to `[-1/(K-1), 1]` if K > 1; ignored otherwise.
+# @return numeric vector (length N) of aggregated SEs, or `NULL` if `method = "none"`.
+aggregate_se_weighted = function(means_list, ses_list = NULL, weights,
+  method = "none",
+  rho = 0
+) {
+  assert_choice(method, c("none", "predictive", "mean", "within", "between"))
+  assert_number(rho, lower = -1, upper = 1)
+  assert_list(means_list, types = "numeric", any.missing = FALSE)
+  assert_list(ses_list, types = "numeric", any.missing = FALSE, len = length(means_list), null.ok = TRUE)
+  assert_numeric(weights, len = length(means_list), any.missing = FALSE, finite = TRUE)
+
+  K = length(means_list)
+  if (K == 0L) stop("internal error: means_list must have length >= 1.")
+  N = length(means_list[[1L]])
+  if (!all(vapply(means_list, length, integer(1)) == N)) stop("All mean vectors must have same length.")
+  M = do.call(cbind, means_list)                   # N x K matrix of means
+
+  # Precompute weighted mean and between-model variance: Var_w(M) = E_w[M^2] - (E_w[M])^2
+  w = as.numeric(weights)
+  # normalize defensively (cheap and avoids drift if upstream check is skipped)
+  sw = sum(w)
+  if (!isTRUE(all.equal(sw, 1))) w = w / sw
+
+  mu_bar = drop(M %*% w)                            # length N
+  Ew_M2  = rowSums(M * M * rep(w, each = N))        # E_w[M^2]
+  v_between = pmax(Ew_M2 - mu_bar^2, 0)             # numerical guard
+
+  if (method == "between") {
+    return(sqrt(v_between))
+  }
+
+  if (method == "none") {
+    return(NULL)
+  }
+
+  if (is.null(ses_list)) {
+    stop("Selected method requires `ses_list`, but it is NULL. Use method \"between\" or \"none\".")
+  }
+  if (length(ses_list) != K) stop("ses_list length must equal means_list length.")
+  if (!all(vapply(ses_list, length, integer(1)) == N)) stop("All SE vectors must have same length.")
+
+  S = do.call(cbind, ses_list)                      # N x K matrix of SEs
+  S2w  = rowSums((S * S) * rep(w,  each = N))       # sum_i w_i s_i^2 (within term)
+
+  if (method == "within") {
+    return(sqrt(pmax(S2w, 0)))
+  }
+
+  if (method == "predictive") {
+    return(sqrt(pmax(S2w + v_between, 0)))
+  }
+
+  # method == "mean": equicorrelated SE of weighted average of means
+  if (K == 1L) return(pmax(S[, 1L], 0))             # single model: return its SE
+  rho_min = -1 / (K - 1)
+  if (!is.finite(rho)) stop("rho must be finite.")
+  rho = min(max(rho, rho_min), 1)                  # clamp to PSD range
+
+  Sw    = rowSums(S * rep(w, each = N))            # sum_i w_i s_i
+  S2w2  = rowSums((S * S) * rep(w^2, each = N))    # sum_i w_i^2 s_i^2
+  var_mean = (1 - rho) * S2w2 + rho * (Sw ^ 2)
+  sqrt(pmax(var_mean, 0))
+}
diff --git a/attic/prompts.md b/attic/prompts.md
new file mode 100644
index 000000000..da8d8fa27
--- /dev/null
+++ b/attic/prompts.md
@@ -0,0 +1 @@
+Please do a code-review of this branch. Check out the diff with `master` to see which files were touched here. Look at other things that may (or may not!) be relevant here, e.g. LearnerAvg.R, PipeOpEnsemble.R, PipeOpLearner.R. Thoroughly consider what things could be improved, and also think if the things may look like they should be improved at first but it would actually not be a good idea on second thought. Also do a thorough review of the tests: Are they sufficient, do they cover all the important cases? ultrathink
diff --git a/attic/revdeps/.gitignore b/attic/revdeps/.gitignore
new file mode 100644
index 000000000..7e843d7eb
--- /dev/null
+++ b/attic/revdeps/.gitignore
@@ -0,0 +1 @@
+revdeps-src/
diff --git a/attic/revdeps/cran_pipelines/.gitignore b/attic/revdeps/cran_pipelines/.gitignore
new file mode 100644
index 000000000..d6b7ef32c
--- /dev/null
+++ b/attic/revdeps/cran_pipelines/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/attic/revdeps/download_revdeps.R b/attic/revdeps/download_revdeps.R
new file mode 100755
index 000000000..d7033ab1a
--- /dev/null
+++ b/attic/revdeps/download_revdeps.R
@@ -0,0 +1,135 @@
+#!/usr/bin/env Rscript
+
+# Reverse dependency source tarball downloader for mlr3pipelines.
+# Usage:
+#   Rscript download_revdeps.R [--package=mlr3pipelines] [--dest=revdeps-src]
+#                              [--repo=https://cran.r-project.org] [--overwrite]
+#
+# The script pulls the latest CRAN metadata, discovers all reverse dependencies
+# of the target package, then downloads their source tarballs into the target
+# directory. Existing files are skipped unless --overwrite is set.
+
+suppressPackageStartupMessages({
+  # no additional packages required
+})
+
+`%||%` <- function(x, y) if (is.null(x) || is.na(x) || identical(x, "")) y else x
+
+usage <- function() {
+  cat(
+    "download_revdeps.R - download source tarballs for CRAN reverse dependencies\n\n",
+    "Options:\n",
+    "  --package=<pkg>    Package whose reverse dependencies are targeted.\n",
+    "                     Defaults to mlr3pipelines.\n",
+    "  --dest=<path>      Directory to store downloaded tarballs.\n",
+    "                     Defaults to 'revdeps-src'.\n",
+    "  --repo=<url>       CRAN-like repository base URL. Defaults to the\n",
+    "                     current getOption('repos')[['CRAN']] (falls back to\n",
+    "                     https://cran.r-project.org if unset).\n",
+    "  --overwrite        Redownload tarballs even if the destination file\n",
+    "                     already exists.\n",
+    "  --help             Print this help message and exit.\n",
+    sep = ""
+  )
+}
+
+parse_args <- function(args) {
+  defaults <- list(
+    package = "mlr3pipelines",
+    dest = "revdeps-src",
+    repo = NULL,
+    overwrite = FALSE
+  )
+  if (!length(args)) {
+    return(defaults)
+  }
+  for (arg in args) {
+    if (arg %in% c("--help", "-h")) {
+      usage()
+      quit(status = 0)
+    }
+    if (arg == "--overwrite") {
+      defaults$overwrite <- TRUE
+      next
+    }
+    if (!grepl("^--[^=]+=.+", arg)) {
+      stop("Unrecognised argument: ", arg)
+    }
+    parts <- strsplit(sub("^--", "", arg), "=", fixed = TRUE)[[1]]
+    key <- parts[1]
+    value <- parts[2]
+    if (!key %in% names(defaults)) {
+      stop("Unknown flag: --", key)
+    }
+    defaults[[key]] <- value
+  }
+  defaults
+}
+
+args <- parse_args(commandArgs(trailingOnly = TRUE))
+
+repo <- args$repo %||% "https://cran.r-project.org"
+if (identical(repo, "@CRAN@")) {
+  repo <- "https://cran.r-project.org"
+}
+
+dest_dir <- args$dest
+if (!dir.exists(dest_dir)) {
+  dir.create(dest_dir, recursive = TRUE, showWarnings = FALSE)
+}
+
+message("Using CRAN repo: ", repo)
+message("Target package: ", args$package)
+message("Destination dir: ", normalizePath(dest_dir, winslash = "/", mustWork = FALSE))
+
+cran_contrib <- utils::contrib.url(repo, type = "source")
+available <- utils::available.packages(contriburl = cran_contrib)
+
+revdeps_raw <- tools::package_dependencies(
+  packages = args$package,
+  db = available,
+  reverse = TRUE,
+  which = c("Depends", "Imports", "LinkingTo", "Suggests", "Enhances")
+)
+
+revdep_pkgs <- sort(unique(unlist(revdeps_raw, use.names = FALSE)))
+if (!length(revdep_pkgs)) {
+  message("No reverse dependencies found on CRAN.")
+  quit(status = 0)
+}
+
+missing_meta <- setdiff(revdep_pkgs, rownames(available))
+if (length(missing_meta)) {
+  warning(
+    "Skipping packages absent from available.packages(): ",
+    paste(missing_meta, collapse = ", ")
+  )
+  revdep_pkgs <- intersect(revdep_pkgs, rownames(available))
+}
+
+download_one <- function(pkg, overwrite) {
+  version <- available[pkg, "Version"]
+  tarball <- sprintf("%s_%s.tar.gz", pkg, version)
+  destfile <- file.path(dest_dir, tarball)
+  if (!overwrite && file.exists(destfile)) {
+    message("[skip] ", tarball, " already exists.")
+    return(invisible(TRUE))
+  }
+  url <- paste0(cran_contrib, "/", tarball)
+  message("[download] ", url)
+  tryCatch(
+    {
+      utils::download.file(url, destfile = destfile, mode = "wb", quiet = FALSE)
+      TRUE
+    },
+    error = function(err) {
+      warning("Failed to download ", tarball, ": ", conditionMessage(err))
+      FALSE
+    }
+  )
+}
+
+results <- vapply(revdep_pkgs, download_one, logical(1), overwrite = args$overwrite)
+summary <- table(factor(results, levels = c(TRUE, FALSE)))
+message("Download complete. Success: ", summary[["TRUE"]], " / ", length(revdep_pkgs),
+  "; Failures: ", summary[["FALSE"]])
diff --git a/attic/revdeps/install_revdep_suggests.R b/attic/revdeps/install_revdep_suggests.R
new file mode 100755
index 000000000..7895dc972
--- /dev/null
+++ b/attic/revdeps/install_revdep_suggests.R
@@ -0,0 +1,160 @@
+#!/usr/bin/env Rscript
+
+# Install test dependencies (Depends, Imports, Suggests) of all CRAN reverse
+# dependencies for mlr3pipelines (or a specified package).
+# Usage:
+#   Rscript install_revdep_suggests.R [--package=mlr3pipelines]
+#                                     [--repo=https://cran.r-project.org]
+#                                     [--lib=<path>] [--type=source]
+#
+# The script resolves reverse dependencies on CRAN, gathers their test
+# dependency fields (Depends, Imports, Suggests), and installs the union of
+# those packages. Reverse dependencies themselves are not installed.
+
+`%||%` <- function(x, y) if (is.null(x) || is.na(x) || identical(x, "")) y else x
+
+usage <- function() {
+  cat(
+    "install_revdep_suggests.R - install test deps of all reverse dependencies\n\n",
+    "Options:\n",
+    "  --package=<pkg>  Package whose reverse dependencies are analysed.\n",
+    "                   Defaults to mlr3pipelines.\n",
+    "  --repo=<url>     CRAN-like repository base URL. Defaults to the current\n",
+    "                   getOption('repos')[['CRAN']] (falls back to\n",
+    "                   https://cran.r-project.org if unset).\n",
+    "  --lib=<path>     Target library directory for installations. Defaults to\n",
+    "                   R_LIBS_USER.\n",
+    "  --type=<type>    Package type passed to install.packages(). Defaults to\n",
+    "                   getOption('pkgType').\n",
+    "  --help           Print this help message and exit.\n",
+    sep = ""
+  )
+}
+
+parse_args <- function(args) {
+  defaults <- list(
+    package = "mlr3pipelines",
+    repo = NULL,
+    lib = NULL,
+    type = getOption("pkgType")
+  )
+  if (!length(args)) {
+    return(defaults)
+  }
+  for (arg in args) {
+    if (arg %in% c("--help", "-h")) {
+      usage()
+      quit(status = 0)
+    }
+    if (!grepl("^--[^=]+=.+", arg)) {
+      stop("Unrecognised argument: ", arg)
+    }
+    parts <- strsplit(sub("^--", "", arg), "=", fixed = TRUE)[[1]]
+    key <- parts[1]
+    value <- parts[2]
+    if (!key %in% names(defaults)) {
+      stop("Unknown flag: --", key)
+    }
+    defaults[[key]] <- value
+  }
+  defaults
+}
+
+args <- parse_args(commandArgs(trailingOnly = TRUE))
+
+repo <- args$repo %||% getOption("repos")[["CRAN"]] %||% "https://cran.r-project.org"
+if (identical(repo, "@CRAN@")) {
+  repo <- "https://cran.r-project.org"
+}
+
+lib_path <- args$lib %||% Sys.getenv("R_LIBS_USER")
+if (!dir.exists(lib_path)) {
+  dir.create(lib_path, recursive = TRUE, showWarnings = FALSE)
+}
+
+install_type <- args$type %||% getOption("pkgType") %||% "source"
+
+message("Using CRAN repo: ", repo)
+message("Target package: ", args$package)
+message("Library path: ", normalizePath(lib_path, winslash = "/", mustWork = FALSE))
+message("Install type: ", install_type)
+
+cran_contrib <- utils::contrib.url(repo, type = "source")
+available <- utils::available.packages(contriburl = cran_contrib)
+
+revdeps_raw <- tools::package_dependencies(
+  packages = args$package,
+  db = available,
+  reverse = TRUE,
+  which = c("Depends", "Imports", "LinkingTo", "Suggests", "Enhances")
+)
+
+revdep_pkgs <- sort(unique(unlist(revdeps_raw, use.names = FALSE)))
+if (!length(revdep_pkgs)) {
+  message("No reverse dependencies found on CRAN.")
+  quit(status = 0)
+}
+
+missing_meta <- setdiff(revdep_pkgs, rownames(available))
+if (length(missing_meta)) {
+  warning(
+    "Skipping packages absent from available.packages(): ",
+    paste(missing_meta, collapse = ", ")
+  )
+  revdep_pkgs <- intersect(revdep_pkgs, rownames(available))
+}
+
+dependency_types <- c("Depends", "Imports", "Suggests")
+
+test_deps_map <- tools::package_dependencies(
+  packages = revdep_pkgs,
+  db = available,
+  which = dependency_types,
+  recursive = FALSE
+)
+
+test_dep_pkgs <- sort(unique(unlist(test_deps_map, use.names = FALSE)))
+test_dep_pkgs <- test_dep_pkgs[nzchar(test_dep_pkgs)]
+
+if (!length(test_dep_pkgs)) {
+  message("Reverse dependencies do not list any test dependencies on CRAN.")
+  quit(status = 0)
+}
+
+missing_test_deps <- setdiff(test_dep_pkgs, rownames(available))
+if (length(missing_test_deps)) {
+  warning(
+    "Skipping packages absent from available.packages(): ",
+    paste(missing_test_deps, collapse = ", ")
+  )
+}
+test_dep_pkgs <- intersect(test_dep_pkgs, rownames(available))
+
+if (!length(test_dep_pkgs)) {
+  message("No test dependency packages available on ", repo)
+  quit(status = 0)
+}
+
+test_dep_pkgs <- setdiff(test_dep_pkgs, revdep_pkgs)
+
+installed <- character(0)
+try({
+  installed <- utils::installed.packages(lib.loc = lib_path)[, "Package"]
+}, silent = TRUE)
+
+to_install <- setdiff(test_dep_pkgs, installed)
+if (!length(to_install)) {
+  message("All test dependency packages already installed in ", lib_path)
+  quit(status = 0)
+}
+
+message("Installing ", length(to_install), " test dependency packages...")
+utils::install.packages(
+  pkgs = to_install,
+  repos = repo,
+  lib = lib_path,
+  type = install_type,
+  dependencies = c("Depends", "Imports", "LinkingTo")
+)
+
+message("Installation attempt finished. Review output for any failures.")
diff --git a/attic/revdeps/new_pipelines/.gitignore b/attic/revdeps/new_pipelines/.gitignore
new file mode 100644
index 000000000..d6b7ef32c
--- /dev/null
+++ b/attic/revdeps/new_pipelines/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/attic/revdeps/run_checks.sh b/attic/revdeps/run_checks.sh
new file mode 100755
index 000000000..3ae32b9e4
--- /dev/null
+++ b/attic/revdeps/run_checks.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+if [ ! -d "../revdeps-src" ]; then
+  echo "Run in a directory parallel to the revdeps-src directory" >&2
+  exit 1
+fi
+
+export _R_CHECK_CRAN_INCOMING_=false
+export _R_CHECK_CRAN_INCOMING_REMOTE_=false
+export _R_CHECK_SYSTEM_CLOCK_=0
+
+
+find ../revdeps-src -name '*.tar.gz' -type f -print0 | parallel -0 -j "$(nproc)" R CMD check --as-cran
diff --git a/attic/revdeps/test_installed_packages.R b/attic/revdeps/test_installed_packages.R
new file mode 100755
index 000000000..bfef9ef25
--- /dev/null
+++ b/attic/revdeps/test_installed_packages.R
@@ -0,0 +1,196 @@
+#!/usr/bin/env Rscript
+## Robust load-test of installed packages in isolated R processes.
+
+suppressWarnings(suppressMessages({
+  if (!requireNamespace("callr", quietly = TRUE)) {
+    stop("Package 'callr' is required. Install it with: install.packages('callr')", call. = FALSE)
+  }
+  if (!requireNamespace("parallel", quietly = TRUE)) {
+    stop("Package 'parallel' is required (part of base R on Linux).", call. = FALSE)
+  }
+}))
+
+## ----------------------------
+## CLI argument parsing (no extra deps)
+## ----------------------------
+args <- commandArgs(trailingOnly = TRUE)
+
+arg_val <- function(flag, default = NULL, is_flag = FALSE) {
+  i <- which(grepl(paste0("^", flag, "(=|$)"), args))
+  if (length(i) == 0) return(default)
+  if (is_flag) return(TRUE)
+  a <- args[i[1]]
+  if (grepl("=", a)) sub("^[^=]+=\\s*", "", a) else default
+}
+
+help <- any(args %in% c("-h", "--help"))
+if (help) {
+  cat("
+Usage: Rscript check_packages_callr.R [options]
+
+Options:
+  --cores=N                 Number of parallel workers (default: all available)
+  --timeout=SEC             Per-package timeout in seconds (default: 60)
+  --include-recommended     Also test 'Recommended' packages (default: FALSE)
+  --pattern=REGEX           Only test packages whose names match REGEX
+  --exclude=REGEX           Exclude packages whose names match REGEX
+  --list                    Only list selected package set and exit
+  --seed=INT                RNG seed for reproducibility of package behavior
+  -h, --help                Show this help
+
+Examples:
+  Rscript check_packages_callr.R --cores=8 --timeout=45
+  Rscript check_packages_callr.R --pattern='^(mlr|keras)$'
+  Rscript check_packages_callr.R --exclude='^(mlr|rJava)$'
+\n")
+  quit(save = "no", status = 0)
+}
+
+cores   <- as.integer(arg_val("--cores", parallel::detectCores()))
+timeout <- as.numeric(arg_val("--timeout", 60))
+inc_rec <- isTRUE(arg_val("--include-recommended", is_flag = TRUE))
+pat     <- arg_val("--pattern", NULL)
+exc     <- arg_val("--exclude", NULL)
+list_only <- isTRUE(arg_val("--list", is_flag = TRUE))
+seed    <- arg_val("--seed", NULL); if (!is.null(seed)) set.seed(as.integer(seed))
+
+## ----------------------------
+## Select packages to test
+## ----------------------------
+ip <- utils::installed.packages()
+pkgs <- rownames(ip)
+
+## Drop base and recommended by default
+if (!inc_rec) {
+  prio <- ip[, "Priority"]
+  pkgs <- pkgs[is.na(prio) | prio == ""]
+}
+
+## Apply include/exclude filters
+if (!is.null(pat)) pkgs <- pkgs[grepl(pat, pkgs)]
+if (!is.null(exc)) pkgs <- pkgs[!grepl(exc, pkgs)]
+
+pkgs <- sort(unique(pkgs))
+
+if (list_only) {
+  cat(sprintf("Selected %d package(s):\n", length(pkgs)))
+  cat(paste(pkgs, collapse = "\n"), "\n")
+  quit(save = "no", status = 0)
+}
+
+if (length(pkgs) == 0) {
+  cat("No packages selected. Nothing to do.\n")
+  quit(save = "no", status = 0)
+}
+
+## ----------------------------
+## Worker: load a single package in a clean R
+## ----------------------------
+test_one <- function(pkg, timeout_sec) {
+  started <- Sys.time()
+  status  <- "ok"
+  msg     <- NA_character_
+  ver     <- as.character(utils::packageVersion(pkg))
+  ## Use callr::r_safe to spawn a minimal clean process.
+  res <- tryCatch(
+    {
+      callr::r_safe(
+        function(p) {
+          suppressPackageStartupMessages({
+            library(p, character.only = TRUE)
+          })
+          TRUE
+        },
+        args = list(p = pkg),
+        timeout = timeout_sec
+      )
+    },
+    callr_timeout_error = function(e) { status <<- "timeout"; msg <<- conditionMessage(e); NA },
+    callr_status_error  = function(e) { status <<- "crash";   msg <<- conditionMessage(e); NA },
+    error               = function(e) { status <<- "error";   msg <<- conditionMessage(e); NA }
+  )
+  finished <- Sys.time()
+  data.frame(
+    package  = pkg,
+    version  = ver,
+    status   = status,
+    message  = ifelse(is.na(msg), "", substr(msg, 1L, 500L)),
+    duration = as.numeric(difftime(finished, started, units = "secs")),
+    stringsAsFactors = FALSE
+  )
+}
+
+## ----------------------------
+## Parallel execution
+## ----------------------------
+cat(sprintf("Starting load test for %d package(s) with %d core(s), timeout=%ss …\n",
+            length(pkgs), cores, timeout))
+
+## mclapply (Linux/macOS). On Linux this is fine and efficient.
+## If mc.cores==1, it runs sequentially.
+results_list <- parallel::mclapply(
+  pkgs,
+  function(p) test_one(p, timeout),
+  mc.cores = max(1L, cores),
+  mc.preschedule = FALSE
+)
+
+res <- do.call(rbind, results_list)
+
+## ----------------------------
+## Report
+## ----------------------------
+ok_n      <- sum(res$status == "ok")
+timeout_n <- sum(res$status == "timeout")
+crash_n   <- sum(res$status == "crash")
+error_n   <- sum(res$status == "error")
+fail_n    <- nrow(res) - ok_n
+
+cat("\n==================== SUMMARY ====================\n")
+cat(sprintf("Total packages tested : %d\n", nrow(res)))
+cat(sprintf("OK                    : %d\n", ok_n))
+cat(sprintf("ERROR                 : %d\n", error_n))
+cat(sprintf("TIMEOUT               : %d\n", timeout_n))
+cat(sprintf("CRASH (non-zero exit) : %d\n", crash_n))
+cat("=================================================\n\n")
+
+if (fail_n > 0) {
+  cat("Packages that failed to load cleanly:\n")
+  print(utils::head(res[res$status != "ok", c("package", "version", "status", "duration", "message")], 50), row.names = FALSE)
+  if (fail_n > 50) {
+    cat(sprintf("… and %d more. Use --pattern/--exclude to focus, or write to file (see below).\n\n", fail_n - 50))
+  } else {
+    cat("\n")
+  }
+
+  ## Aggregate by status/message to spot common causes (e.g., missing libssl, GLIBCXX)
+  agg <- aggregate(list(n = res$package[res$status != "ok"]),
+                   by = list(status = res$status[res$status != "ok"],
+                             message = res$message[res$status != "ok"]),
+                   FUN = length)
+  ord <- order(agg$n, decreasing = TRUE)
+  cat("Most common failure messages:\n")
+  print(utils::head(agg[ord, ], 10), row.names = FALSE)
+  cat("\n")
+
+  ## Convenience: suggest reinstall-from-source line for purely 'error' cases
+  reinstall <- res$package[res$status %in% c("error")]
+  if (length(reinstall)) {
+    cmd <- sprintf("install.packages(c(%s), type = 'source')",
+                   paste(sprintf("'%s'", sort(unique(reinstall))), collapse = ", "))
+    cat("Suggested reinstall from source for 'error' cases:\n")
+    cat(cmd, "\n\n")
+  }
+} else {
+  cat("All selected packages loaded successfully in isolated sessions.\n\n")
+}
+
+## Optional: write CSVs if env vars are set (no extra CLI complexity)
+out_csv <- Sys.getenv("PKG_CHECK_OUT_CSV", "")
+if (nzchar(out_csv)) {
+  utils::write.csv(res, file = out_csv, row.names = FALSE)
+  cat(sprintf("Wrote full results to: %s\n", out_csv))
+}
+
+invisible(NULL)
+
diff --git a/man/mlr_graphs.Rd b/man/mlr_graphs.Rd
index 8ed2e1b58..8a736fe9d 100644
--- a/man/mlr_graphs.Rd
+++ b/man/mlr_graphs.Rd
@@ -35,7 +35,7 @@ Returns a \code{data.table} with column \code{key} (\code{character}).
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library(mlr3)
 lrn = lrn("regr.rpart")
 task = mlr_tasks$get("boston_housing")
diff --git a/man/mlr_graphs_bagging.Rd b/man/mlr_graphs_bagging.Rd
index 45883f0b7..e88f7f205 100644
--- a/man/mlr_graphs_bagging.Rd
+++ b/man/mlr_graphs_bagging.Rd
@@ -54,7 +54,7 @@ This is done as follows:
 All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}.
 }
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 \donttest{
 library(mlr3)
 lrn_po = po("learner", lrn("regr.rpart"))
diff --git a/man/mlr_graphs_ovr.Rd b/man/mlr_graphs_ovr.Rd
index bf0f42924..ab4e2d303 100644
--- a/man/mlr_graphs_ovr.Rd
+++ b/man/mlr_graphs_ovr.Rd
@@ -23,7 +23,7 @@ perform "One vs. Rest" classification.
 All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}.
 }
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("wine")
diff --git a/man/mlr_graphs_robustify.Rd b/man/mlr_graphs_robustify.Rd
index 4cfd40044..f9f79d7bd 100644
--- a/man/mlr_graphs_robustify.Rd
+++ b/man/mlr_graphs_robustify.Rd
@@ -86,7 +86,7 @@ factor variables, no encoding is performed.
 All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}.
 }
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 \donttest{
 library(mlr3)
 lrn = lrn("regr.rpart")
diff --git a/man/mlr_graphs_stacking.Rd b/man/mlr_graphs_stacking.Rd
index 11dbc6344..c864a52e9 100644
--- a/man/mlr_graphs_stacking.Rd
+++ b/man/mlr_graphs_stacking.Rd
@@ -43,7 +43,7 @@ features in order to predict the outcome.
 All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}.
 }
 \examples{
-\dontshow{if (mlr3misc::require_namespaces("rpart", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces("rpart", quietly = TRUE)) withAutoprint(\{ # examplesIf}
 library(mlr3)
 library(mlr3learners)
 
diff --git a/man/mlr_graphs_targettrafo.Rd b/man/mlr_graphs_targettrafo.Rd
index c858596f1..ecd47e6a5 100644
--- a/man/mlr_graphs_targettrafo.Rd
+++ b/man/mlr_graphs_targettrafo.Rd
@@ -40,7 +40,7 @@ parameters \code{trafo} and \code{inverter} of the \code{param_set} of the resul
 All input arguments are cloned and have no references in common with the returned \code{\link{Graph}}.
 }
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 tt = pipeline_targettrafo(PipeOpLearner$new(LearnerRegrRpart$new()))
diff --git a/man/mlr_learners_graph.Rd b/man/mlr_learners_graph.Rd
index bf684365b..95df4a8db 100644
--- a/man/mlr_learners_graph.Rd
+++ b/man/mlr_learners_graph.Rd
@@ -175,7 +175,7 @@ recommended.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 graph = po("pca") \%>>\% lrn("classif.rpart")
diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd
index c41449bbe..ffd87f9c1 100644
--- a/man/mlr_pipeops.Rd
+++ b/man/mlr_pipeops.Rd
@@ -67,7 +67,7 @@ values enclosed by square brackets ("\code{[}", "\verb{]}"), then the respective
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 mlr_pipeops$get("learner", lrn("classif.rpart"))
diff --git a/man/mlr_pipeops_adas.Rd b/man/mlr_pipeops_adas.Rd
index 33a094003..01e21c463 100644
--- a/man/mlr_pipeops_adas.Rd
+++ b/man/mlr_pipeops_adas.Rd
@@ -69,7 +69,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("smotefamily")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("smotefamily")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 # Create example task
diff --git a/man/mlr_pipeops_blsmote.Rd b/man/mlr_pipeops_blsmote.Rd
index 08c6b0d8d..98a859917 100644
--- a/man/mlr_pipeops_blsmote.Rd
+++ b/man/mlr_pipeops_blsmote.Rd
@@ -77,7 +77,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("smotefamily")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("smotefamily")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 # Create example task
diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd
index 1b672f56c..2b94a75a5 100644
--- a/man/mlr_pipeops_boxcox.Rd
+++ b/man/mlr_pipeops_boxcox.Rd
@@ -72,7 +72,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("bestNormalize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("bestNormalize")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd
index beb232e55..0f4443fa5 100644
--- a/man/mlr_pipeops_classifavg.Rd
+++ b/man/mlr_pipeops_classifavg.Rd
@@ -75,7 +75,7 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 \donttest{
 library("mlr3")
 
diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd
index 33cb75759..a4c3a9984 100644
--- a/man/mlr_pipeops_encodelmer.Rd
+++ b/man/mlr_pipeops_encodelmer.Rd
@@ -94,7 +94,7 @@ Only methods inherited \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpT
 }
 
 \examples{
-\dontshow{if (mlr3misc::require_namespaces(c("nloptr", "lme4"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("nloptr", "lme4"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
 library("mlr3")
 poe = po("encodelmer")
 
diff --git a/man/mlr_pipeops_encodepltree.Rd b/man/mlr_pipeops_encodepltree.Rd
index 2053eb2b5..c535a8ac0 100644
--- a/man/mlr_pipeops_encodepltree.Rd
+++ b/man/mlr_pipeops_encodepltree.Rd
@@ -73,7 +73,7 @@ Only methods inherited from \code{\link{PipeOpEncodePL}}/\code{\link{PipeOpTaskP
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library(mlr3)
 
 # For classification task
diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd
index 308e66011..aeedae8dc 100644
--- a/man/mlr_pipeops_filter.Rd
+++ b/man/mlr_pipeops_filter.Rd
@@ -98,7 +98,7 @@ Methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpT
 }
 
 \examples{
-\dontshow{if (mlr3misc::require_namespaces(c("mlr3filters", "rpart"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("mlr3filters", "rpart"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
 library("mlr3")
 library("mlr3filters")
 \dontshow{data.table::setDTthreads(1)}
diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd
index 6de1101d2..bbc1132b4 100644
--- a/man/mlr_pipeops_ica.Rd
+++ b/man/mlr_pipeops_ica.Rd
@@ -98,7 +98,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("fastICA")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("fastICA")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd
index fb7c920a9..72d4e7ba7 100644
--- a/man/mlr_pipeops_imputelearner.Rd
+++ b/man/mlr_pipeops_imputelearner.Rd
@@ -90,7 +90,7 @@ Only methods inherited from \code{\link{PipeOpImpute}}/\code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("pima")
diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd
index b18f5806f..0c5eb8a91 100644
--- a/man/mlr_pipeops_kernelpca.Rd
+++ b/man/mlr_pipeops_kernelpca.Rd
@@ -75,7 +75,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("kernlab")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("kernlab")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd
index aab63683b..fff28b33f 100644
--- a/man/mlr_pipeops_learner.Rd
+++ b/man/mlr_pipeops_learner.Rd
@@ -100,7 +100,7 @@ Methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
diff --git a/man/mlr_pipeops_learner_cv.Rd b/man/mlr_pipeops_learner_cv.Rd
index 4dfd38111..3c04a3dc5 100644
--- a/man/mlr_pipeops_learner_cv.Rd
+++ b/man/mlr_pipeops_learner_cv.Rd
@@ -13,6 +13,8 @@ Wraps an \code{\link[mlr3:Learner]{mlr3::Learner}} into a \code{\link{PipeOp}}.
 Returns cross-validated predictions during training as a \code{\link[mlr3:Task]{Task}} and stores a model of the
 \code{\link[mlr3:Learner]{Learner}} trained on the whole data in \verb{$state}. This is used to create a similar
 \code{\link[mlr3:Task]{Task}} during prediction.
+Optionally, the fitted models obtained during the resampling phase can be reused for prediction by averaging
+their predictions, avoiding the need for an additional fit on the complete training data.
 
 The \code{\link[mlr3:Task]{Task}} gets features depending on the capsuled \code{\link[mlr3:Learner]{Learner}}'s
 \verb{$predict_type}. If the \code{\link[mlr3:Learner]{Learner}}'s \verb{$predict.type} is \code{"response"}, a feature \verb{<ID>.response} is created,
@@ -70,6 +72,11 @@ Training time, in seconds.
 Errors logged during prediction.
 \item \code{predict_time} :: \code{NULL} | \code{numeric(1)}
 Prediction time, in seconds.
+\item \code{predict_method} :: \code{character(1)}\cr
+\code{"full"} when prediction uses a learner fitted on all training data, \code{"cv_ensemble"} when predictions are averaged over
+models trained on resampling folds.
+\item \code{cv_model_states} :: \code{NULL} | \code{list}\cr
+Present for \code{predict_method = "cv_ensemble"}. Contains the states of the learners trained on each resampling fold.
 }
 
 This state is given the class \code{"pipeop_learner_cv_state"}.
@@ -87,6 +94,20 @@ predictions with the model trained on all training data.
 Number of cross validation folds. Initialized to 3. Only used for \code{resampling.method = "cv"}.
 \item \code{keep_response} :: \code{logical(1)}\cr
 Only effective during \code{"prob"} prediction: Whether to keep response values, if available. Initialized to \code{FALSE}.
+\item \code{resampling.predict_method} :: \code{character(1)}\cr
+Controls how predictions are produced after training. \code{"full"} (default) fits the wrapped learner on the entire training data.
+\code{"cv_ensemble"} reuses the models fitted during resampling and averages their predictions. This option currently supports
+classification and regression learners together with \code{resampling.method = "cv"}.
+\item \code{resampling.se_aggr} :: \code{character(1)}\cr
+Standard error aggregation used when \code{"cv_ensemble"} predictions are produced for regression learners with \code{predict_type}
+containing \code{"se"}. Shares the definitions with \code{\link{PipeOpRegrAvg}}, i.e. \code{"predictive"}, \code{"mean"}, \code{"within"}, \code{"between"}, \code{"none"}.
+Initialized to \code{"predictive"} (within-fold variance plus between-fold disagreement) when constructed with a \code{\link[mlr3:Learner]{Learner}} that has \code{predict_type = "se"};
+otherwise to \code{"none"}.\cr
+Only present for learners that support \code{"se"} predictions.
+\item \code{resampling.se_aggr_rho} :: \code{numeric(1)}\cr
+Equicorrelation parameter for \code{resampling.se_aggr = "mean"}, interpreted as in \code{\link{PipeOpRegrAvg}}. Ignored otherwise.
+Defaults to \code{0} when \code{resampling.se_aggr = "mean"}.\cr
+Only present for learners that support \code{"se"} predictions.
 }
 }
 
@@ -112,7 +133,7 @@ Methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
@@ -131,6 +152,7 @@ graph = gunion(list(
 graph$train(task)
 
 graph$pipeops$classif.rpart$learner$predict_type = "prob"
+graph$pipeops$classif.rpart$param_set$values$resampling.predict_method = "cv_ensemble"
 
 graph$train(task)
 \dontshow{\}) # examplesIf}
diff --git a/man/mlr_pipeops_learner_pi_cvplus.Rd b/man/mlr_pipeops_learner_pi_cvplus.Rd
index 188718a80..71c29e9b7 100644
--- a/man/mlr_pipeops_learner_pi_cvplus.Rd
+++ b/man/mlr_pipeops_learner_pi_cvplus.Rd
@@ -100,7 +100,7 @@ Methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("mtcars")
diff --git a/man/mlr_pipeops_nearmiss.Rd b/man/mlr_pipeops_nearmiss.Rd
index 8d4898a66..6340acf23 100644
--- a/man/mlr_pipeops_nearmiss.Rd
+++ b/man/mlr_pipeops_nearmiss.Rd
@@ -69,7 +69,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("themis")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("themis")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 # Create example task
diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 7f0cfc265..a6d3bd8f1 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}.
 
 \section{Internals}{
 
-Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and
+Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and
 \code{\link[MASS:ginv]{ginv()}}.
 }
 
@@ -111,7 +111,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (mlr3misc::require_namespaces(c("NMF", "MASS"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("NMF", "MASS"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
 \dontshow{
 # NMF attaches these packages to search path on load, #929
 lapply(c("package:Biobase", "package:BiocGenerics", "package:generics"), detach, character.only = TRUE)
diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd
index 062e086f8..3aa506282 100644
--- a/man/mlr_pipeops_ovrsplit.Rd
+++ b/man/mlr_pipeops_ovrsplit.Rd
@@ -81,7 +81,7 @@ Only methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library(mlr3)
 task = tsk("iris")
 po = po("ovrsplit")
diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd
index 3ce033902..ba513e4ee 100644
--- a/man/mlr_pipeops_ovrunite.Rd
+++ b/man/mlr_pipeops_ovrunite.Rd
@@ -74,7 +74,7 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library(mlr3)
 task = tsk("iris")
 gr = po("ovrsplit") \%>>\% lrn("classif.rpart") \%>>\% po("ovrunite")
diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd
index d1b92cf65..b472ba83d 100644
--- a/man/mlr_pipeops_proxy.Rd
+++ b/man/mlr_pipeops_proxy.Rd
@@ -74,7 +74,7 @@ Only methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 set.seed(1234)
diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd
index e3331d268..570d9e20f 100644
--- a/man/mlr_pipeops_randomresponse.Rd
+++ b/man/mlr_pipeops_randomresponse.Rd
@@ -79,7 +79,7 @@ Only methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library(mlr3)
 library(mlr3learners)
 
diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd
index 76fac8eb0..6e8542ddf 100644
--- a/man/mlr_pipeops_regravg.Rd
+++ b/man/mlr_pipeops_regravg.Rd
@@ -12,12 +12,80 @@ Perform (weighted) prediction averaging from regression \code{\link[mlr3:Predict
 \code{\link{PipeOpRegrAvg}} to multiple \code{\link{PipeOpLearner}} outputs.
 
 The resulting \code{"response"} prediction is a weighted average of the incoming \code{"response"} predictions.
-\code{"se"} prediction is currently not aggregated but discarded if present.
+Aggregation of \code{"se"} predictions is controlled by the \code{se_aggr} parameter (see below). When \code{"se"} is not requested
+or \code{se_aggr = "none"}, \code{"se"} is dropped.
+}
+\section{"se" Aggregation}{
+
+
+Let there be \code{K} incoming predictions with weights \code{w} (sum to 1). For a given row \code{j}, denote
+per-model means \code{mu_i[j]} and, if available, per-model standard errors \code{se_i[j]}.
+Define
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{mu_bar[j]      = sum_i w[i] * mu_i[j]
+var_between[j] = sum_i w[i] * (mu_i[j] - mu_bar[j])^2 # weighted var of means
+var_within[j]  = sum_i w[i] * se_i[j]^2               # weighted mean of SE^2s
+}\if{html}{\out{</div>}}
+
+The following aggregation methods are available:
+\itemize{
+\item \strong{\code{se_aggr = "predictive"}} -- \emph{Within + Between (mixture/predictive SD)}
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{se[j] = sqrt(var_within[j] + var_between[j])
+}\if{html}{\out{</div>}}
+
+\strong{Interpretation.} Treats each incoming \code{se_i} as that model's predictive SD at the point (or, if the learner
+reports SE of the conditional mean--as many \code{mlr3} regression learners do--then as that mean-SE). The returned \code{se}
+is the SD of the \emph{mixture ensemble} under weighted averaging: it increases when base models disagree (epistemic spread)
+and when individual models are uncertain (aleatoric spread).
+\strong{Notes.} If \code{se_i} represents \emph{mean} SE (common in \code{predict.lm(se.fit=TRUE)}-style learners), the result
+aggregates those mean-SEs and still adds model disagreement correctly, but it will \emph{underestimate} a true predictive SD
+that would additionally include irreducible noise. Requires \code{"se"} to be present from \strong{all} inputs.
+\item \strong{\code{se_aggr = "mean"}} -- \emph{SE of the weighted average of means under equicorrelation}
+With a correlation parameter \code{se_aggr_rho = rho}, assume
+\code{Cov(mu_i_hat, mu_j_hat) = rho * se_i * se_j} for all \code{i != j}. Then
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{# components:
+a[j] = sum_i (w[i]^2 * se_i[j]^2)
+b[j] = (sum_i w[i] * se_i[j])^2
+var_mean[j] = (1 - rho) * a[j] + rho * b[j]
+se[j] = sqrt(var_mean[j])
+}\if{html}{\out{</div>}}
+
+\strong{Interpretation.} Returns the \emph{standard error of the averaged estimator} \verb{sum_i w[i] * mu_i}, not a predictive SD.
+Use when you specifically care about uncertainty of the averaged mean itself.
+\strong{Notes.} \code{rho} is clamped to the PSD range \verb{[-1/(K-1), 1]} for \code{K > 1}. Typical settings:
+\code{rho = 0} (assume independence; often optimistic for CV/bagging) and \code{rho = 1} (perfect correlation; conservative and
+equal to the weighted arithmetic mean of SEs). Requires \code{"se"} from \strong{all} inputs.
+\item \strong{\code{se_aggr = "within"}} -- \emph{Within-model component only}
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{se[j] = sqrt(var_within[j])
+}\if{html}{\out{</div>}}
+
+\strong{Interpretation.} Aggregates only the average per-model uncertainty and \strong{ignores} disagreement between models.
+Useful as a diagnostic of the aleatoric component; not a full ensemble uncertainty.
+\strong{Notes.} Typically \emph{underestimates} the uncertainty of the ensemble prediction when models disagree.
+Requires \code{"se"} from \strong{all} inputs.
+\item \strong{\code{se_aggr = "between"}} -- \emph{Between-model component only (works without \code{"se"})}
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{se[j] = sqrt(var_between[j])
+}\if{html}{\out{</div>}}
+
+\strong{Interpretation.} Captures only the spread of the base means (epistemic/model disagreement).
+\strong{Notes.} This is the only method that does not use incoming \code{"se"}. It is a \emph{lower bound} on a full predictive SD,
+because it omits within-model noise.
+\item \strong{\code{se_aggr = "none"}} -- \emph{Do not return \code{"se"}}
+\code{"se"} is dropped from the output prediction.
+}
+
+\strong{Relationships and edge cases.} For any row, \code{se("predictive") >= max(se("within"), se("between"))}.
+With a single input (\code{K = 1}), \code{"predictive"} and \code{"within"} return the input \code{"se"}, \code{"between"} returns \code{0}.
+Methods \code{"predictive"}, \code{"mean"}, and \code{"within"} require all inputs to provide \code{"se"}; otherwise aggregation errors.
 
 Weights can be set as a parameter; if none are provided, defaults to
 equal weights for each prediction.
-Defaults to equal weights for each model.
 }
+
 \section{Construction}{
 
 
@@ -51,7 +119,15 @@ The \verb{$state} is left empty (\code{list()}).
 
 \section{Parameters}{
 
-The parameters are the parameters inherited from the \code{\link{PipeOpEnsemble}}.
+The parameters are the parameters inherited from the \code{\link{PipeOpEnsemble}}, as well as:
+\itemize{
+\item \code{se_aggr} :: \code{character(1)}\cr
+Controls how incoming \code{"se"} values are aggregated into an ensemble \code{"se"}. One of
+\code{"predictive"}, \code{"mean"}, \code{"within"}, \code{"between"}, \code{"none"}. See the description above for definitions and interpretation.
+\item \code{se_aggr_rho} :: \code{numeric(1)}\cr
+Equicorrelation parameter used only for \code{se_aggr = "mean"}. Interpreted as the common correlation between
+per-model mean estimators. Recommended range \verb{[0, 1]}; values are clamped to \verb{[-1/(K-1), 1]} for validity.
+}
 }
 
 \section{Internals}{
@@ -70,18 +146,18 @@ Only methods inherited from \code{\link{PipeOpEnsemble}}/\code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
-# Simple Bagging
+# Simple Bagging for Regression
 gr = ppl("greplicate",
   po("subsample") \%>>\%
-  po("learner", lrn("classif.rpart")),
+  po("learner", lrn("regr.rpart")),
   n = 5
 ) \%>>\%
-  po("classifavg")
+  po("regravg")
 
-resample(tsk("iris"), GraphLearner$new(gr), rsmp("holdout"))
+resample(tsk("mtcars"), GraphLearner$new(gr), rsmp("holdout"))
 \dontshow{\}) # examplesIf}
 }
 \seealso{
diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd
index 0bfd310db..412e0bed5 100644
--- a/man/mlr_pipeops_smote.Rd
+++ b/man/mlr_pipeops_smote.Rd
@@ -69,7 +69,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("smotefamily")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("smotefamily")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 # Create example task
diff --git a/man/mlr_pipeops_smotenc.Rd b/man/mlr_pipeops_smotenc.Rd
index 5a231bb2a..b9dfa69a9 100644
--- a/man/mlr_pipeops_smotenc.Rd
+++ b/man/mlr_pipeops_smotenc.Rd
@@ -78,7 +78,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("themis")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("themis")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 # Create example task
diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd
index b348d1ce3..eb1d10b11 100644
--- a/man/mlr_pipeops_targetmutate.Rd
+++ b/man/mlr_pipeops_targetmutate.Rd
@@ -78,7 +78,7 @@ Only methods inherited from \code{\link{PipeOpTargetTrafo}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library(mlr3)
 task = tsk("boston_housing")
 po = PipeOpTargetMutate$new("logtrafo", param_vals = list(
diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd
index 8400551c5..9f4b95778 100644
--- a/man/mlr_pipeops_targettrafoscalerange.Rd
+++ b/man/mlr_pipeops_targettrafoscalerange.Rd
@@ -66,7 +66,7 @@ Only methods inherited from \code{\link{PipeOpTargetTrafo}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library(mlr3)
 task = tsk("boston_housing")
 po = PipeOpTargetTrafoScaleRange$new()
diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd
index d40503694..726573712 100644
--- a/man/mlr_pipeops_textvectorizer.Rd
+++ b/man/mlr_pipeops_textvectorizer.Rd
@@ -167,7 +167,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (mlr3misc::require_namespaces(c("stopwords", "quanteda"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("stopwords", "quanteda"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
 library("mlr3")
 library("data.table")
 # create some text data
diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd
index d8aa2fa5c..65ef1fd9e 100644
--- a/man/mlr_pipeops_threshold.Rd
+++ b/man/mlr_pipeops_threshold.Rd
@@ -67,7 +67,7 @@ Only methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 t = tsk("german_credit")
 gr = po(lrn("classif.rpart", predict_type = "prob")) \%>>\%
diff --git a/man/mlr_pipeops_tomek.Rd b/man/mlr_pipeops_tomek.Rd
index 7a3bee4bd..7fa699d8c 100644
--- a/man/mlr_pipeops_tomek.Rd
+++ b/man/mlr_pipeops_tomek.Rd
@@ -61,7 +61,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("themis")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("themis")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 # Create example task
diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd
index f2707ef05..3ffd5e43f 100644
--- a/man/mlr_pipeops_tunethreshold.Rd
+++ b/man/mlr_pipeops_tunethreshold.Rd
@@ -89,7 +89,7 @@ Only methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (mlr3misc::require_namespaces(c("bbotk", "rpart", "GenSA"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (mlr3misc::require_namespaces(c("bbotk", "rpart", "GenSA"), quietly = TRUE)) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd
index 263b41ff3..434e3ec96 100644
--- a/man/mlr_pipeops_updatetarget.Rd
+++ b/man/mlr_pipeops_updatetarget.Rd
@@ -75,7 +75,7 @@ Only methods inherited from \code{\link{PipeOp}}.
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 \dontrun{
 # Create a binary class task from iris
 library(mlr3)
diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd
index 81514f09d..110fe7084 100644
--- a/man/mlr_pipeops_vtreat.Rd
+++ b/man/mlr_pipeops_vtreat.Rd
@@ -128,7 +128,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("vtreat")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("vtreat")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 set.seed(2020)
diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd
index f1823e343..1293846c7 100644
--- a/man/mlr_pipeops_yeojohnson.Rd
+++ b/man/mlr_pipeops_yeojohnson.Rd
@@ -73,7 +73,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
-\dontshow{if (requireNamespace("bestNormalize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("bestNormalize")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
diff --git a/man/po.Rd b/man/po.Rd
index cdb941a25..8b1b13562 100644
--- a/man/po.Rd
+++ b/man/po.Rd
@@ -48,7 +48,7 @@ it to a \code{\link{PipeOp}}. \code{pos()} (with plural-s) takes either a \code{
 list of objects, and creates a \code{list} of \code{\link{PipeOp}}s.
 }
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 po("learner", lrn("classif.rpart"), cp = 0.3)
diff --git a/man/ppl.Rd b/man/ppl.Rd
index 190eef7d1..77ec8d1c4 100644
--- a/man/ppl.Rd
+++ b/man/ppl.Rd
@@ -32,7 +32,7 @@ Creates a \code{\link{Graph}} from \code{\link{mlr_graphs}} from given ID
 vector of any list and returns a \code{list} of possibly muliple \code{\link{Graph}}s.
 }
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 gr = ppl("bagging", graph = po(lrn("regr.rpart")),
diff --git a/man/preproc.Rd b/man/preproc.Rd
index 2b6340637..7fa3b3e43 100644
--- a/man/preproc.Rd
+++ b/man/preproc.Rd
@@ -56,7 +56,7 @@ of \code{\link[mlr3:TaskSupervised]{TaskSupervised}} will not work with these in
 }
 
 \examples{
-\dontshow{if (requireNamespace("rpart")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (requireNamespace("rpart")) withAutoprint(\{ # examplesIf}
 library("mlr3")
 
 task = tsk("iris")
diff --git a/tests/testthat/test_pipeop_classifavg.R b/tests/testthat/test_pipeop_classifavg.R
new file mode 100644
index 000000000..feb3a07af
--- /dev/null
+++ b/tests/testthat/test_pipeop_classifavg.R
@@ -0,0 +1,127 @@
+context("PipeOpClassifAvg")
+
+predict_classifavg = function(predictions, prob_aggr = "mean", weights = 1, prob_aggr_eps = 1e-12) {
+  po = po("classifavg")
+  po$param_set$values$weights = weights
+  po$param_set$values$prob_aggr = prob_aggr
+  if (identical(prob_aggr, "log")) {
+    po$param_set$values$prob_aggr_eps = prob_aggr_eps
+  }
+  train_nulls = replicate(length(predictions), NULL, simplify = FALSE)
+  po$train(train_nulls)
+  po$predict(predictions)[[1]]
+}
+
+test_that("PipeOpClassifAvg probability aggregation methods return expected probabilities", {
+  row_ids = 1:3
+  lvls = c("c0", "c1", "c2")
+  truth = factor(c("c0", "c1", "c2"), levels = lvls)
+
+  prob_mats = list(
+    matrix(c(
+      0.7, 0.2, 0.1,
+      0.3, 0.4, 0.3,
+      0.2, 0.3, 0.5
+    ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls)),
+    matrix(c(
+      0.6, 0.1, 0.3,
+      0.4, 0.3, 0.3,
+      0.25, 0.25, 0.5
+    ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls)),
+    matrix(c(
+      0.5, 0.3, 0.2,
+      0.2, 0.5, 0.3,
+      0.3, 0.2, 0.5
+    ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls))
+  )
+
+  predictions = lapply(prob_mats, function(prob) {
+    PredictionClassif$new(row_ids = row_ids, truth = truth, prob = prob)
+  })
+  weights = c(0.2, 0.3, 0.5)
+
+  pred_mean = predict_classifavg(predictions, prob_aggr = "mean", weights = weights)
+  expected_mean = Reduce(`+`, Map(function(prob, w) prob * w, prob_mats, weights))
+  expect_equal(pred_mean$prob, expected_mean, tolerance = 1e-8)
+  expected_response_mean = factor(colnames(expected_mean)[max.col(expected_mean, ties.method = "first")], levels = lvls)
+  expect_equal(pred_mean$response, expected_response_mean)
+
+  pred_log = predict_classifavg(predictions, prob_aggr = "log", weights = weights)
+  expected_log = mlr3pipelines:::weighted_matrix_logpool(prob_mats, weights, epsilon = 1e-12)
+  expect_equal(pred_log$prob, expected_log, tolerance = 1e-8)
+  expected_response_log = factor(colnames(expected_log)[max.col(expected_log, ties.method = "first")], levels = lvls)
+  expect_equal(pred_log$response, expected_response_log)
+})
+
+test_that("PipeOpClassifAvg single prediction returns input probabilities for mean and log", {
+  row_ids = 1:4
+  lvls = c("yes", "no")
+  truth = factor(c("yes", "no", "yes", "no"), levels = lvls)
+  single_prob = matrix(c(
+    0.8, 0.2,
+    0.1, 0.9,
+    0.6, 0.4,
+    0.3, 0.7
+  ), ncol = length(lvls), byrow = TRUE, dimnames = list(NULL, lvls))
+
+  prediction = list(PredictionClassif$new(row_ids = row_ids, truth = truth, prob = single_prob))
+
+  result_mean = predict_classifavg(prediction, prob_aggr = "mean", weights = 1)
+  expect_equal(result_mean$prob, single_prob, tolerance = 1e-10)
+
+  result_log = predict_classifavg(prediction, prob_aggr = "log", weights = 1)
+  expect_equal(result_log$prob, single_prob, tolerance = 1e-10)
+})
+
+test_that("PipeOpClassifAvg aggregates factor responses when probabilities are missing", {
+  row_ids = 1:5
+  lvls = c("a", "b")
+  truth = factor(rep("a", length(row_ids)), levels = lvls)
+  responses = list(
+    factor(c("a", "a", "b", "a", "b"), levels = lvls),
+    factor(c("b", "a", "b", "b", "b"), levels = lvls),
+    factor(c("a", "b", "a", "a", "b"), levels = lvls)
+  )
+  predictions = lapply(responses, function(resp) {
+    PredictionClassif$new(row_ids = row_ids, truth = truth, response = resp)
+  })
+  weights = c(0.5, 0.3, 0.2)
+
+  result = predict_classifavg(predictions, prob_aggr = "log", weights = weights)
+  expected_freq = mlr3pipelines:::weighted_factor_mean(responses, weights, lvls)
+  expect_equal(result$prob, expected_freq)
+  expected_response = factor(lvls[max.col(expected_freq, ties.method = "first")], levels = lvls)
+  expect_equal(result$response, expected_response)
+})
+
+test_that("PipeOpClassifAvg log aggregation handles zeros with epsilon", {
+  row_ids = 1
+  lvls = c("a", "b")
+  truth = factor("a", levels = lvls)
+  prob_list = list(
+    matrix(c(0, 1), ncol = length(lvls), dimnames = list(NULL, lvls)),
+    matrix(c(0.5, 0.5), ncol = length(lvls), dimnames = list(NULL, lvls)),
+    matrix(c(0.5, 0.5), ncol = length(lvls), dimnames = list(NULL, lvls))
+  )
+  predictions = lapply(prob_list, function(prob) {
+    PredictionClassif$new(row_ids = row_ids, truth = truth, prob = prob)
+  })
+
+  po = po("classifavg")
+  po$param_set$values$weights = rep(1 / length(predictions), length(predictions))
+  po$param_set$values$prob_aggr = "log"
+  po$param_set$values$prob_aggr_eps = 1e-12
+
+  po$train(replicate(length(predictions), NULL, simplify = FALSE))
+  result_eps = po$predict(predictions)[[1]]
+  expect_true(all(is.finite(result_eps$prob)))
+  expected_eps = mlr3pipelines:::weighted_matrix_logpool(prob_list, po$param_set$values$weights, epsilon = 1e-12)
+  expect_equal(result_eps$prob, expected_eps, tolerance = 1e-10)
+
+  po$param_set$values$prob_aggr_eps = 0
+  po$train(replicate(length(predictions), NULL, simplify = FALSE))
+  result_zero = po$predict(predictions)[[1]]
+  expected_zero = mlr3pipelines:::weighted_matrix_logpool(prob_list, po$param_set$values$weights, epsilon = 0)
+  expect_equal(result_zero$prob, expected_zero)
+  expect_equal(as.numeric(result_zero$prob[1, ]), c(0, 1))
+})
diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R
index 9d6069f65..a91c5731e 100644
--- a/tests/testthat/test_pipeop_learnercv.R
+++ b/tests/testthat/test_pipeop_learnercv.R
@@ -42,12 +42,410 @@ test_that("PipeOpLearnerCV - param values", {
   skip_if_not_installed("rpart")
   lrn = mlr_learners$get("classif.rpart")
   polrn = PipeOpLearnerCV$new(lrn)
-  expect_subset(c("minsplit", "resampling.method", "resampling.folds"), polrn$param_set$ids())
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, xval = 0))
+  expect_true(all(c(
+    "minsplit",
+    "resampling.method",
+    "resampling.folds",
+    "resampling.predict_method",
+    "resampling.prob_aggr",
+    "resampling.prob_aggr_eps"
+  ) %in% polrn$param_set$ids()))
+  expect_false(any(c("resampling.se_aggr", "resampling.se_aggr_rho") %in% polrn$param_set$ids()))
+  expect_equal(polrn$param_set$values$resampling.method, "cv")
+  expect_equal(polrn$param_set$values$resampling.folds, 3)
+  expect_false(polrn$param_set$values$resampling.keep_response)
+  expect_equal(polrn$param_set$values$resampling.predict_method, "full")
+  expect_equal(polrn$param_set$values$resampling.prob_aggr, "mean")
+  expect_null(polrn$param_set$values$resampling.prob_aggr_eps)
+  expect_equal(polrn$param_set$values$xval, 0)
   polrn$param_set$values$minsplit = 2
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+  expect_equal(polrn$param_set$values$minsplit, 2)
+  expect_equal(polrn$param_set$values$resampling.prob_aggr, "mean")
+  expect_null(polrn$param_set$values$resampling.prob_aggr_eps)
   polrn$param_set$values$resampling.folds = 4
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 4, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+  expect_equal(polrn$param_set$values$resampling.folds, 4)
+  expect_equal(polrn$param_set$values$minsplit, 2)
+})
+
+test_that("PipeOpLearnerCV se aggregation default matches learner predict_type", {
+  learner_resp = LearnerRegrDebug$new()
+  learner_resp$predict_type = "response"
+  po_resp = PipeOpLearnerCV$new(learner_resp)
+  expect_true("resampling.se_aggr" %in% po_resp$param_set$ids())
+  expect_identical(po_resp$param_set$values$resampling.se_aggr, "none")
+
+  learner_se = LearnerRegrDebug$new()
+  learner_se$predict_type = "se"
+  po_se = PipeOpLearnerCV$new(learner_se)
+  expect_true(all(c("resampling.se_aggr", "resampling.se_aggr_rho") %in% po_se$param_set$ids()))
+  expect_identical(po_se$param_set$values$resampling.se_aggr, "predictive")
+
+  learner_no_se = lrn("regr.rpart")
+  po_no_se = PipeOpLearnerCV$new(learner_no_se)
+  expect_false(any(c("resampling.se_aggr", "resampling.se_aggr_rho") %in% po_no_se$param_set$ids()))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble averages fold learners", {
+  skip_if_not_installed("rpart")
+  task = tsk("iris")
+  learner = lrn("classif.rpart", predict_type = "prob")
+  po = PipeOpLearnerCV$new(learner,
+    param_vals = list(
+      resampling.folds = 2,
+      resampling.keep_response = TRUE,
+      resampling.predict_method = "cv_ensemble"
+    )
+  )
+
+  trained_task = po$train(list(task))[[1]]
+  expect_setequal(trained_task$feature_names, c(
+    sprintf("%s.response", po$id),
+    paste0(po$id, ".prob.", task$class_names)
+  ))
+  expect_equal(po$state$predict_method, "cv_ensemble")
+  expect_length(po$state$cv_model_states, 2)
+
+  result_task = po$predict(list(task))[[1]]
+  prob_feature_names = paste0(po$id, ".prob.", task$class_names)
+
+  pred_probs = as.matrix(result_task$data(rows = task$row_ids, cols = prob_feature_names))
+  manual_probs = mlr3misc::map(po$state$cv_model_states, function(state) {
+    clone = learner$clone(deep = TRUE)
+    clone$state = state
+    dt = as.data.table(clone$predict(task))
+    data.table::setorder(dt, row_ids)
+    as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+  })
+  manual_prob = Reduce(`+`, manual_probs) / length(manual_probs)
+  colnames(manual_prob) = prob_feature_names
+  expect_equal(pred_probs, manual_prob)
+
+  result_response = result_task$data(rows = task$row_ids, cols = sprintf("%s.response", po$id))[[1]]
+  expect_equal(
+    as.character(result_response),
+    task$class_names[max.col(manual_prob)]
+  )
+})
+
+test_that("PipeOpLearnerCV - cv ensemble drops response when requested", {
+  skip_if_not_installed("rpart")
+  task = tsk("iris")
+  learner = lrn("classif.rpart", predict_type = "prob")
+  po = PipeOpLearnerCV$new(learner,
+    param_vals = list(
+      resampling.predict_method = "cv_ensemble"
+    )
+  )
+  po$train(list(task))
+  result_task = po$predict(list(task))[[1]]
+  expect_true(all(sprintf("%s.prob.%s", po$id, task$class_names) %in% result_task$feature_names))
+  expect_false(any(sprintf("%s.response", po$id) %in% result_task$feature_names))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble averages classif responses", {
+  skip_if_not_installed("rpart")
+  task = tsk("iris")
+  learner = lrn("classif.rpart", predict_type = "response")
+  po = PipeOpLearnerCV$new(learner,
+    param_vals = list(resampling.predict_method = "cv_ensemble")
+  )
+  po$train(list(task))
+  expect_equal(po$state$predict_method, "cv_ensemble")
+  expect_true(length(po$state$cv_model_states) > 1)
+
+  result_task = po$predict(list(task))[[1]]
+  response_feature = sprintf("%s.response", po$id)
+  expect_setequal(result_task$feature_names, response_feature)
+
+  manual_responses = mlr3misc::map(po$state$cv_model_states, function(state) {
+    clone = learner$clone(deep = TRUE)
+    clone$state = state
+    pred_dt = as.data.table(clone$predict(task))
+    data.table::setorderv(pred_dt, "row_ids")
+    as.character(pred_dt$response)
+  })
+
+  manual_matrix = as.matrix(do.call(cbind, manual_responses))
+  n = nrow(manual_matrix)
+  prob_matrix = vapply(task$class_names, function(cls) rowMeans(manual_matrix == cls), numeric(n))
+  if (!is.matrix(prob_matrix)) {
+    prob_matrix = matrix(prob_matrix, ncol = length(task$class_names))
+  }
+  colnames(prob_matrix) = task$class_names
+  manual_response = task$class_names[max.col(prob_matrix, ties.method = "first")]
+  manual_response = factor(manual_response, levels = task$class_names)
+
+  observed_response = result_task$data(rows = task$row_ids, cols = response_feature)[[1]]
+  expect_equal(as.character(observed_response), as.character(manual_response))
+
+  learner_prediction = po$learner_model$predict(task)
+  expect_equal(as.character(learner_prediction$response), as.character(manual_response))
+  pred_dt = as.data.table(learner_prediction)
+  data.table::setorderv(pred_dt, "row_ids")
+  graph_prob = as.matrix(pred_dt[, paste0("prob.", task$class_names), with = FALSE])
+  colnames(graph_prob) = task$class_names
+  expect_equal(graph_prob, prob_matrix)
+})
+
+test_that("PipeOpLearnerCV - cv ensemble log prob aggregation", {
+  skip_if_not_installed("rpart")
+  task = tsk("iris")
+  learner = lrn("classif.rpart", predict_type = "prob")
+  param_vals = list(
+    resampling.folds = 3,
+    resampling.keep_response = TRUE,
+    resampling.predict_method = "cv_ensemble",
+    resampling.prob_aggr = "log",
+    resampling.prob_aggr_eps = 1e-8
+  )
+  po = PipeOpLearnerCV$new(learner, param_vals = param_vals)
+
+  trained_task = po$train(list(task))[[1]]
+  prob_cols = paste0(po$id, ".prob.", task$class_names)
+  expect_true(all(prob_cols %in% trained_task$feature_names))
+
+  result_task = po$predict(list(task))[[1]]
+  result_probs = as.matrix(result_task$data(rows = task$row_ids, cols = prob_cols))
+  manual_probs = mlr3misc::map(po$state$cv_model_states, function(state) {
+    clone = learner$clone(deep = TRUE)
+    clone$state = state
+    dt = as.data.table(clone$predict(task))
+    data.table::setorder(dt, row_ids)
+    as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+  })
+  weights = rep(1 / length(manual_probs), length(manual_probs))
+  expected_probs = mlr3pipelines:::weighted_matrix_logpool(manual_probs, weights, epsilon = param_vals$resampling.prob_aggr_eps)
+  colnames(expected_probs) = prob_cols
+  expect_equal(result_probs, expected_probs, tolerance = 1e-8)
+
+  response_col = sprintf("%s.response", po$id)
+  observed_response = result_task$data(rows = task$row_ids, cols = response_col)[[1]]
+  expected_response = factor(task$class_names[max.col(expected_probs, ties.method = "first")], levels = task$class_names)
+  expect_equal(as.character(observed_response), as.character(expected_response))
+
+  graph_prediction = po$learner_model$predict(task)
+  graph_dt = as.data.table(graph_prediction)
+  data.table::setorder(graph_dt, row_ids)
+  graph_probs = as.matrix(graph_dt[, paste0("prob.", task$class_names), with = FALSE])
+  colnames(graph_probs) = prob_cols
+  expect_equal(graph_probs, expected_probs, tolerance = 1e-8)
+  expect_equal(as.character(graph_dt$response), as.character(expected_response))
+})
+
+test_that("PipeOpLearnerCV - log aggregation with zeros uses epsilon", {
+  backend = data.table::data.table(
+    x = 1:2,
+    y = factor(c("a", "b"), levels = c("a", "b"))
+  )
+  task = TaskClassif$new("two_point", backend = backend, target = "y")
+  learner = lrn("classif.featureless", predict_type = "prob")
+  po = PipeOpLearnerCV$new(learner, param_vals = list(
+    resampling.method = "cv",
+    resampling.folds = 2,
+    resampling.predict_method = "cv_ensemble",
+    resampling.prob_aggr = "log",
+    resampling.prob_aggr_eps = 1e-8
+  ))
+
+  po$train(list(task))
+  result_task = po$predict(list(task))[[1]]
+  prob_cols = paste0(po$id, ".prob.", task$class_names)
+  probs = as.matrix(result_task$data(rows = task$row_ids, cols = prob_cols))
+  expect_false(any(is.nan(probs)))
+  expect_equal(
+    unname(probs),
+    matrix(rep(0.5, length(task$row_ids) * length(task$class_names)),
+      ncol = length(task$class_names), byrow = TRUE
+    )
+  )
+})
+
+test_that("PipeOpLearnerCV - log aggregation epsilon controls shrinkage", {
+  backend = data.table::data.table(
+    x = 1:3,
+    y = factor(c("a", "b", "b"), levels = c("a", "b"))
+  )
+  task = TaskClassif$new("three_point", backend = backend, target = "y")
+  learner = lrn("classif.featureless", predict_type = "prob")
+
+  po = PipeOpLearnerCV$new(learner, param_vals = list(
+    resampling.method = "cv",
+    resampling.folds = 3,
+    resampling.predict_method = "cv_ensemble",
+    resampling.prob_aggr = "log",
+    resampling.prob_aggr_eps = 1e-12
+  ))
+  po$train(list(task))
+  result_task = po$predict(list(task))[[1]]
+
+  manual_probs = mlr3misc::map(po$state$cv_model_states, function(state) {
+    clone = learner$clone(deep = TRUE)
+    clone$state = state
+    dt = as.data.table(clone$predict(task))
+    data.table::setorder(dt, row_ids)
+    as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+  })
+  weights = rep(1 / length(manual_probs), length(manual_probs))
+  expected_eps = mlr3pipelines:::weighted_matrix_logpool(
+    manual_probs, weights, epsilon = po$param_set$values$resampling.prob_aggr_eps
+  )
+  prob_cols = paste0(po$id, ".prob.", task$class_names)
+  observed_eps = as.matrix(result_task$data(rows = task$row_ids, cols = prob_cols))
+  expect_false(any(is.nan(observed_eps)))
+  expect_equal(unname(observed_eps), unname(expected_eps), tolerance = 1e-10)
+
+  po$param_set$values$resampling.prob_aggr_eps = 0
+  po$train(list(task))
+  result_zero = po$predict(list(task))[[1]]
+  manual_probs_zero = mlr3misc::map(po$state$cv_model_states, function(state) {
+    clone = learner$clone(deep = TRUE)
+    clone$state = state
+    dt = as.data.table(clone$predict(task))
+    data.table::setorder(dt, row_ids)
+    as.matrix(dt[, paste0("prob.", task$class_names), with = FALSE])
+  })
+  weights_zero = rep(1 / length(manual_probs_zero), length(manual_probs_zero))
+  manual_zero = mlr3pipelines:::weighted_matrix_logpool(manual_probs_zero, weights_zero, epsilon = 0)
+  colnames(manual_zero) = paste0(po$id, ".prob.", task$class_names)
+  observed_zero = as.matrix(result_zero$data(rows = task$row_ids, cols = prob_cols))
+  expect_equal(unname(observed_zero), unname(manual_zero))
+  expect_equal(observed_zero[, prob_cols[2]], rep(1, task$nrow))
+  expect_equal(observed_zero[, prob_cols[1]], rep(0, task$nrow))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble averages regression predictions", {
+  skip_if_not_installed("rpart")
+  task = TaskRegr$new("mtcars", backend = data.table::as.data.table(mtcars), target = "mpg")
+  learner = lrn("regr.rpart")
+  po = PipeOpLearnerCV$new(learner,
+    param_vals = list(resampling.folds = 2, resampling.predict_method = "cv_ensemble")
+  )
+  po$train(list(task))
+  result_task = po$predict(list(task))[[1]]
+  feature_name = sprintf("%s.response", po$id)
+  expect_true(feature_name %in% result_task$feature_names)
+
+  manual_responses = mlr3misc::map(po$state$cv_model_states, function(state) {
+    clone = learner$clone(deep = TRUE)
+    clone$state = state
+    pred = clone$predict(task)
+    pred$response
+  })
+  manual_average = Reduce(`+`, manual_responses) / length(manual_responses)
+  expect_equal(result_task$data(rows = task$row_ids, cols = feature_name)[[1]], manual_average)
+
+  graph_pred = po$learner_model$predict(task)
+  expect_equal(graph_pred$response, manual_average)
+  expect_true(is.null(graph_pred$se) || all(is.na(graph_pred$se)))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble handles multiplicity", {
+  skip_if_not_installed("rpart")
+  tasks = Multiplicity(tsk("iris"), tsk("sonar"))
+  learner = lrn("classif.rpart", predict_type = "prob")
+  po = po("learner_cv", learner,
+    param_vals = list(resampling.predict_method = "cv_ensemble")
+  )
+
+  train_out = po$train(list(tasks))[[1]]
+  expect_class(train_out, "Multiplicity")
+  expect_equal(length(train_out), 2L)
+  expect_true(all(mlr3misc::map_lgl(train_out, inherits, what = "Task")))
+
+  expect_class(po$state, "Multiplicity")
+  expect_true(all(mlr3misc::map_lgl(po$state, function(st) st$predict_method == "cv_ensemble")))
+  expect_true(all(mlr3misc::map_lgl(po$state, function(st) length(st$cv_model_states) == po$param_set$values$resampling.folds)))
+
+  predict_out = po$predict(list(tasks))[[1]]
+  expect_class(predict_out, "Multiplicity")
+  expect_equal(length(predict_out), 2L)
+  expect_true(all(mlr3misc::map_lgl(predict_out, inherits, what = "Task")))
+
+  orig_tasks = as.list(tasks)
+  pred_tasks = as.list(predict_out)
+  expect_true(all(unlist(Map(function(pred_task, orig_task) {
+    all(pred_task$feature_names %in% paste0(po$id, ".prob.", orig_task$class_names))
+  }, pred_tasks, orig_tasks))))
+})
+
+test_that("PipeOpLearnerCV - cv ensemble requires resampling method cv", {
+  skip_if_not_installed("rpart")
+  po = PipeOpLearnerCV$new(
+    lrn("classif.rpart"),
+    param_vals = list(resampling.method = "insample", resampling.predict_method = "cv_ensemble")
+  )
+  expect_error(po$train(list(tsk("iris"))), "cv_ensemble")
+})
+
+test_that("PipeOpLearnerCV - learner_model returns averaged ensemble", {
+  skip_if_not_installed("rpart")
+  task = tsk("iris")
+  learner = lrn("classif.rpart", predict_type = "prob")
+  po = PipeOpLearnerCV$new(learner,
+    param_vals = list(resampling.predict_method = "cv_ensemble", resampling.keep_response = TRUE)
+  )
+  po$train(list(task))
+
+  learner_model = po$learner_model
+  expect_class(learner_model, "GraphLearner")
+
+  task_prediction = po$predict(list(task))[[1]]
+  dt_po = task_prediction$data(rows = task$row_ids, cols = task_prediction$feature_names)
+
+  graph_prediction = learner_model$predict(task)
+  expect_class(graph_prediction, "PredictionClassif")
+  dt_graph = as.data.table(graph_prediction)
+  data.table::setorder(dt_graph, row_ids)
+
+  prob_cols = paste0(po$id, ".prob.", task$class_names)
+  graph_prob_cols = paste0("prob.", task$class_names)
+  graph_matrix = as.matrix(dt_graph[, graph_prob_cols, with = FALSE])
+  colnames(graph_matrix) = prob_cols
+  expect_equal(as.matrix(dt_po[, prob_cols, with = FALSE]), graph_matrix)
+
+  expect_equal(
+    as.character(dt_po[[sprintf("%s.response", po$id)]]),
+    as.character(dt_graph$response)
+  )
+})
+
+test_that("PipeOpLearnerCV - cv ensemble with predict_type = 'se'", {
+  skip_if_not_installed("mlr3learners")
+  task = tsk("mtcars")
+  learner = lrn("regr.lm", predict_type = "se")
+  po = PipeOpLearnerCV$new(learner,
+    param_vals = list(resampling.predict_method = "cv_ensemble")
+  )
+  po$train(list(task))
+  result_task = po$predict(list(task))[[1]]
+
+  response_col = sprintf("%s.response", po$id)
+  se_col = sprintf("%s.se", po$id)
+  expect_true(all(c(response_col, se_col) %in% result_task$feature_names))
+
+  manual_preds = mlr3misc::map(po$state$cv_model_states, function(state) {
+    clone = learner$clone(deep = TRUE)
+    clone$state = state
+    clone$predict(task)
+  })
+
+  manual_dt = mlr3misc::map(manual_preds, function(pred) {
+    dt = as.data.table(pred)
+    data.table::setorderv(dt, "row_ids")
+    list(response = dt$response, se = dt$se)
+  })
+  manual_response = Reduce(`+`, mlr3misc::map(manual_dt, "response")) / length(manual_dt)
+  expect_equal(result_task$data(rows = task$row_ids, cols = response_col)[[1]], manual_response)
+
+  weights = rep(1 / length(manual_dt), length(manual_dt))
+  manual_se = mlr3pipelines:::aggregate_se_weighted(
+    mlr3misc::map(manual_dt, "response"),
+    mlr3misc::map(manual_dt, "se"),
+    weights = weights,
+    method = "predictive",
+    rho = 0
+  )
+  expect_equal(result_task$data(rows = task$row_ids, cols = se_col)[[1]], manual_se)
 })
 
 test_that("PipeOpLearnerCV - within resampling", {
@@ -142,6 +540,19 @@ test_that("marshal", {
 test_that("marshal multiplicity", {
   skip_if_not_installed("rpart")
   skip_if_not_installed("bbotk")
+  if (!"mlr3pipelines" %in% rownames(installed.packages())) {
+    expect_man_exists <<- function(man) {
+      checkmate::expect_string(man, na.ok = TRUE, fixed = "::")
+      if (!is.na(man)) {
+        parts = strsplit(man, "::", fixed = TRUE)[[1L]]
+        if (parts[1L] %nin% rownames(installed.packages())) {
+          return(invisible(NULL))
+        }
+        matches = help.search(parts[2L], package = parts[1L], ignore.case = FALSE)
+        checkmate::expect_data_frame(matches$matches, min.rows = 1L, info = "man page lookup")
+      }
+    }
+  }
   po = po("learner_cv", learner = lrn("classif.debug"))
   po$train(list(Multiplicity(tsk("iris"), tsk("sonar"))))
   s = po$state
@@ -193,6 +604,25 @@ test_that("marshal multiplicity", {
 
 })
 
+test_that("marshal with cv ensemble", {
+  skip_if_not_installed("rpart")
+  task = tsk("iris")
+  po = po("learner_cv", learner = lrn("classif.rpart", predict_type = "prob"),
+    param_vals = list(resampling.predict_method = "cv_ensemble"))
+  po$train(list(task))
+  expect_equal(po$state$predict_method, "cv_ensemble")
+  marshaled = marshal_model(po$state)
+  expect_true(is_marshaled_model(marshaled) || inherits(marshaled, "pipeop_learner_cv_state"))
+  unmarshaled = unmarshal_model(marshaled)
+  expect_equal(names(unmarshaled), names(po$state))
+  expect_equal(length(unmarshaled$cv_model_states), length(po$state$cv_model_states))
+  po$state = unmarshaled
+  expect_equal(
+    po$predict(list(task)),
+    po$predict(list(task))
+  )
+})
+
 test_that("state class and multiplicity", {
   po = po("learner_cv", learner = lrn("classif.debug"))
   po$train(list(Multiplicity(tsk("iris"))))
@@ -206,3 +636,87 @@ test_that("state class and multiplicity", {
   expect_class(po1$state[[1L]], "Multiplicity")
   expect_class(po1$state[[1L]][[1L]], "pipeop_learner_cv_state")
 })
+
+test_that("PipeOpLearnerCV cv ensemble aggregates SE like PipeOpRegrAvg", {
+  task_backend = data.table::data.table(
+    x1 = c(1, 2, 3, 4),
+    x2 = c(4, 3, 2, 1),
+    y = c(2, 4, 5, 7)
+  )
+  task = TaskRegr$new("debug_se_task", backend = task_backend, target = "y")
+  configs = list(
+    list(se_aggr = "none", rho = NULL),
+    list(se_aggr = "between", rho = NULL),
+    list(se_aggr = "within", rho = NULL),
+    list(se_aggr = "predictive", rho = NULL),
+    list(se_aggr = "mean", rho = 0),
+    list(se_aggr = "mean", rho = 1),
+    list(se_aggr = "mean", rho = -0.5)
+  )
+
+  for (cfg in configs) {
+    learner = LearnerRegrDebug$new()
+    learner$predict_type = "se"
+    param_vals = list(
+      resampling.method = "cv",
+      resampling.folds = 2,
+      resampling.predict_method = "cv_ensemble",
+      resampling.se_aggr = cfg$se_aggr
+    )
+    if (!is.null(cfg$rho)) {
+      param_vals$resampling.se_aggr_rho = cfg$rho
+    }
+    po = PipeOpLearnerCV$new(learner, param_vals = param_vals)
+
+    po$train(list(task))
+    result_task = po$predict(list(task))[[1]]
+    col_response = sprintf("%s.response", po$id)
+    col_se = sprintf("%s.se", po$id)
+
+    expect_true(col_response %in% result_task$feature_names)
+
+    base_preds = mlr3misc::map(po$state$cv_model_states, function(st) {
+      base = LearnerRegrDebug$new()
+      base$predict_type = "se"
+      base$state = st
+      pred = base$predict(task)
+      pred_dt = as.data.table(pred)
+      data.table::setorder(pred_dt, row_ids)
+      list(response = pred_dt$response, se = pred_dt$se)
+    })
+
+    k = length(base_preds)
+    weights = rep(1 / k, k)
+    response_list = mlr3misc::map(base_preds, "response")
+    expected_response = Reduce(`+`, response_list) / k
+    se_list = mlr3misc::map(base_preds, "se")
+    expected_se = mlr3pipelines:::aggregate_se_weighted(
+      response_list,
+      se_list,
+      weights = weights,
+      method = cfg$se_aggr,
+      rho = cfg$rho %??% 0
+    )
+
+    observed_response = result_task$data(rows = task$row_ids, cols = col_response)[[1]]
+    expect_equal(observed_response, expected_response)
+
+    if (is.null(expected_se)) {
+      expect_false(col_se %in% result_task$feature_names)
+    } else {
+      expect_true(col_se %in% result_task$feature_names)
+      observed_se = result_task$data(rows = task$row_ids, cols = col_se)[[1]]
+      expect_equal(observed_se, expected_se)
+    }
+
+    learner_model = po$learner_model
+    expect_class(learner_model, "GraphLearner")
+    graph_pred = learner_model$predict(task)
+    expect_equal(graph_pred$response, expected_response)
+    if (is.null(expected_se)) {
+      expect_true(is.null(graph_pred$se) || all(is.na(graph_pred$se)))
+    } else {
+      expect_equal(graph_pred$se, expected_se)
+    }
+  }
+})
diff --git a/tests/testthat/test_pipeop_regravg.R b/tests/testthat/test_pipeop_regravg.R
new file mode 100644
index 000000000..e17e47276
--- /dev/null
+++ b/tests/testthat/test_pipeop_regravg.R
@@ -0,0 +1,162 @@
+context("PipeOpRegrAvg")
+
+predict_regravg = function(predictions, se_aggr, weights, se_aggr_rho = NULL) {
+  po = po("regravg")
+  po$param_set$values$weights = weights
+  po$param_set$values$se_aggr = se_aggr
+  if (!is.null(se_aggr_rho)) {
+    po$param_set$values$se_aggr_rho = se_aggr_rho
+  }
+  train_nulls = replicate(length(predictions), NULL, simplify = FALSE)
+  po$train(train_nulls)
+  po$predict(predictions)[[1]]
+}
+
+test_that("PipeOpRegrAvg se aggregation methods return expected SE", {
+  row_ids = 1:2
+  truth = c(0.5, -1.2)
+  responses = list(
+    c(1, 4),
+    c(2, 5),
+    c(6, 7)
+  )
+  ses = list(
+    c(0.2, 0.3),
+    c(0.4, 0.5),
+    c(0.6, 0.7)
+  )
+  weights = c(0.2, 0.3, 0.5)
+
+  make_predictions = function(responses, ses_list = NULL) {
+    lapply(seq_along(responses), function(i) {
+      args = list(
+        row_ids = row_ids,
+        truth = truth,
+        response = responses[[i]]
+      )
+      if (!is.null(ses_list)) {
+        args$se = ses_list[[i]]
+      }
+      do.call(PredictionRegr$new, args)
+    })
+  }
+
+  preds_with_se = make_predictions(responses, ses)
+  preds_without_se = make_predictions(responses, NULL)
+
+  response_matrix = do.call(cbind, responses)
+  expected_response = as.numeric(response_matrix %*% weights)
+
+  weight_matrix = matrix(weights, nrow = length(row_ids), ncol = length(weights), byrow = TRUE)
+  between = rowSums((response_matrix^2) * weight_matrix) - expected_response^2
+  between = pmax(between, 0)
+  expected_between = sqrt(between)
+
+  se_matrix = do.call(cbind, ses)
+  within = rowSums((se_matrix^2) * weight_matrix)
+  within = pmax(within, 0)
+  expected_within = sqrt(within)
+  expected_predictive = sqrt(within + between)
+
+  weight_matrix_sq = matrix(weights^2, nrow = length(row_ids), ncol = length(weights), byrow = TRUE)
+  Sw = rowSums(se_matrix * weight_matrix)
+  S2w2 = rowSums((se_matrix^2) * weight_matrix_sq)
+  expected_mean_rho0 = sqrt(pmax(S2w2, 0))
+  expected_mean_rho1 = sqrt(pmax(Sw^2, 0))
+  rho_min = -1 / (length(weights) - 1)
+  var_rho_min = (1 - rho_min) * S2w2 + rho_min * (Sw^2)
+  expected_mean_rho_min = sqrt(pmax(var_rho_min, 0))
+
+  pred_none = predict_regravg(preds_with_se, "none", weights)
+  expect_equal(pred_none$response, expected_response)
+  expect_true(all(is.na(pred_none$se)))
+  expect_false("se" %in% names(pred_none$data))
+
+  pred_between = predict_regravg(preds_without_se, "between", weights)
+  expect_equal(pred_between$response, expected_response)
+  expect_equal(pred_between$se, expected_between)
+
+  pred_within = predict_regravg(preds_with_se, "within", weights)
+  expect_equal(pred_within$response, expected_response)
+  expect_equal(pred_within$se, expected_within)
+
+  pred_predictive = predict_regravg(preds_with_se, "predictive", weights)
+  expect_equal(pred_predictive$response, expected_response)
+  expect_equal(pred_predictive$se, expected_predictive)
+
+  pred_mean_indep = predict_regravg(preds_with_se, "mean", weights, se_aggr_rho = 0)
+  expect_equal(pred_mean_indep$response, expected_response)
+  expect_equal(pred_mean_indep$se, expected_mean_rho0)
+
+  pred_mean_full = predict_regravg(preds_with_se, "mean", weights, se_aggr_rho = 1)
+  expect_equal(pred_mean_full$response, expected_response)
+  expect_equal(pred_mean_full$se, expected_mean_rho1)
+
+  pred_mean_clamped = predict_regravg(preds_with_se, "mean", weights, se_aggr_rho = -1)
+  expect_equal(pred_mean_clamped$response, expected_response)
+  expect_equal(pred_mean_clamped$se, expected_mean_rho_min)
+})
+
+test_that("PipeOpRegrAvg se aggregation requiring SE errors when SE is missing", {
+  responses = list(
+    c(1, 2),
+    c(3, 4)
+  )
+  weights = c(0.5, 0.5)
+  preds_without_se = lapply(responses, function(resp) {
+    PredictionRegr$new(row_ids = 1:2, truth = c(0, 0), response = resp)
+  })
+
+  expect_error(
+    predict_regravg(preds_without_se, "predictive", weights),
+    "requires `ses_list`"
+  )
+
+  expect_error(
+    predict_regravg(preds_without_se, "mean", weights),
+    "requires `ses_list`"
+  )
+
+  expect_error(
+    predict_regravg(preds_without_se, "within", weights),
+    "requires `ses_list`"
+  )
+})
+
+test_that("PipeOpRegrAvg se aggregation with single prediction behaves correctly", {
+  row_ids = 1:4
+  truth = c(0, 1, 2, 3)
+  response = c(1.1, 2.2, 3.3, 4.4)
+  se = c(0.5, 0.6, 0.7, 0.8)
+  single_pred_with_se = list(PredictionRegr$new(
+    row_ids = row_ids,
+    truth = truth,
+    response = response,
+    se = se
+  ))
+  single_pred_without_se = list(PredictionRegr$new(
+    row_ids = row_ids,
+    truth = truth,
+    response = response
+  ))
+
+  result_none = predict_regravg(single_pred_with_se, "none", weights = 1)
+  expect_equal(result_none$response, response)
+  expect_false("se" %in% names(result_none$data))
+
+  result_between = predict_regravg(single_pred_without_se, "between", weights = 1)
+  expect_equal(result_between$response, response)
+  expect_equal(result_between$se, rep(0, length(response)))
+
+  result_within = predict_regravg(single_pred_with_se, "within", weights = 1)
+  expect_equal(result_within$response, response)
+  expect_equal(result_within$se, se)
+
+  result_predictive = predict_regravg(single_pred_with_se, "predictive", weights = 1)
+  expect_equal(result_predictive$response, response)
+  expect_equal(result_predictive$se, se)
+
+  result_mean = predict_regravg(single_pred_with_se, "mean", weights = 1, se_aggr_rho = 0.25)
+  expect_equal(result_mean$response, response)
+  expect_equal(result_mean$se, se)
+})