From b1b836b0ac2e5d0c39c105bd39f38c253cf1f2eb Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Sun, 28 Jul 2024 11:42:02 +0200
Subject: [PATCH 01/36] feat: add init umap implementation

---
 DESCRIPTION    |   3 +-
 R/PipeOpUMAP.R | 155 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+), 1 deletion(-)
 create mode 100644 R/PipeOpUMAP.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 29f8348b2..afe29cf56 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -87,7 +87,8 @@ Suggests:
     methods,
     vtreat,
     future,
-    htmlwidgets
+    htmlwidgets,
+    uwot (>= 0.2.1)
 ByteCompile: true
 Encoding: UTF-8
 Config/testthat/edition: 3
diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
new file mode 100644
index 000000000..bb793dac1
--- /dev/null
+++ b/R/PipeOpUMAP.R
@@ -0,0 +1,155 @@
+#' @title Uniform Manifold Approximation and Projection (UMAP)
+#'
+#' @usage NULL
+#' @name mlr_pipeops_umap
+#' @format [`R6Class`] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @description
+#' Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP).
+#' See [uwot::umap2()] for details.
+#'
+#' @section Construction:
+#' ```
+#' PipeOpUMAP$new(id = "umap", param_vals = list())
+#' ```
+#'
+#' * `id` :: `character(1)`\cr
+#'   Identifier of resulting object, default `"umap"`.
+#' * `param_vals` :: named `list`\cr
+#'   List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
+#'
+#' @section Input and Output Channels:
+#' Input and output channels are inherited from [`PipeOpTaskPreproc`].
+#'
+#' The output is the input [`Task`][mlr3::Task] with all affected numeric features replaced by their principal components.
+#'
+#' @section State:
+#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the class [stats::prcomp],
+#' with the exception of the `$x` slot. These are in particular:
+#' * `sdev` :: `numeric`\cr
+#'   The standard deviations of the principal components.
+#' * `rotation` :: `matrix`\cr
+#'   The matrix of variable loadings.
+#' * `center` :: `numeric` | `logical(1)`\cr
+#'   The centering used, or `FALSE`.
+#' * `scale` :: `numeric` | `logical(1)`\cr
+#'   The scaling used, or `FALSE`.
+#'
+#' @section Parameters:
+#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
+#' * `center` :: `logical(1)`\cr
+#'   Indicating whether the features should be centered. Default is `TRUE`. See [`prcomp()`][stats::prcomp].
+#' * `scale.` :: `logical(1)`\cr
+#'   Whether to scale features to unit variance before analysis. Default is `FALSE`, but scaling is advisable. See [`prcomp()`][stats::prcomp].
+#' * `rank.` :: `integer(1)`\cr
+#'   Maximal number of principal components to be used. Default is `NULL`: use all components. See [`prcomp()`][stats::prcomp].
+#'
+#' @section Internals:
+#' Uses the [`umap()`][uwot::umap] function.
+#'
+#' @section Methods:
+#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @examples
+#' library("mlr3")
+#'
+#' task = tsk("iris")
+#' pop = po("umap")
+#'
+#' task$data()
+#' pop$train(list(task))[[1]]$data()
+#'
+#' pop$state
+#' @family PipeOps
+#' @template seealso_pipeopslist
+#' @include PipeOpTaskPreproc.R
+#' @export
+PipeOpUMAP = R6Class("PipeOpUMAP",
+  inherit = PipeOpTaskPreproc,
+  public = list(
+    initialize = function(id = "umap", param_vals = list()) {
+      ps = ps(
+        n_neighbors = p_int(2L, 100L, default = 15L, tags = c("train", "umap")),
+        n_components = p_int(1L, 100L, default = 2L, tags = c("train", "umap")),
+        metric = p_fct(
+          c("euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical"),
+          default = "euclidean",
+          tags = c("train", "umap"),
+          depends = quote(nn_method == "hnsw")
+        ),
+        n_epochs = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        learning_rate = p_dbl(0, default = 1, tags = c("train", "umap")),
+        scale = p_lgl(default = FALSE, special_vals = list("none", "Z", "maxabs", "range", "colrange", NULL), tags = c("train", "umap")),
+        init = p_uty(
+          default = "spectral",
+          tags = c("train", "umap"),
+          custom_check = crate(function(x) {
+            choices = c("spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", "agspectral")
+            check_choice(x, choices) %check||% check_matrix(x)
+          })
+        ),
+        init_sdev = p_uty(default = "range", tags = c("train", "umap")),
+        spread = p_dbl(default = 1, tags = c("train", "umap")),
+        min_dist = p_dbl(default = 0.01, tags = c("train", "umap")),
+        set_op_mix_ratio = p_dbl(0, 1, default = 1, tags = c("train", "umap")),
+        local_connectivity = p_dbl(1, default = 1L, tags = c("train", "umap")),
+        bandwidth = p_dbl(default = 1, tags = c("train", "umap")),
+        repulsion_strength = p_dbl(default = 1, tags = c("train", "umap")),
+        negative_sample_rate = p_dbl(default = 5L, tags = c("train", "umap")),
+        a = p_uty(default = NULL, tags = c("train", "umap")),
+        b = p_uty(default = NULL, tags = c("train", "umap")),
+        nn_method = p_uty(
+          default = NULL,
+          tags = c("train", "umap"),
+          custom_check = crate(function(x) {
+            check_choice(x, c("fnn", "annoy", "hnsw", "nndescent"), null.ok = TRUE) %check||%
+              check_list(x, types = "matrix", len = 2L, names = "idx", "dist") %check||%
+              check_class(x, "dgCMatrix")
+          })
+        ),
+        n_trees = p_int(10L, 100L, default = 50L, tags = c("train", "umap")),
+        search_k = p_int(tags = c("train", "umap")),
+        approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")),
+        y = p_uty(default = NULL, tags = c("train", "umap")),
+        target_n_neighbors = p_int(tags = c("train", "umap")),
+        target_metric = p_fct(c("euclidean", "cosine", "correlation"), default = "euclidean", tags = c("train", "umap")),
+        target_weight = p_dbl(0, 1, default = 0.5, tags = c("train", "umap")),
+        pca = p_int(1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        pca_center = p_lgl(default = TRUE, tags = c("train", "umap")),
+        pca_rand = p_lgl(default = TRUE, tags = c("train", "umap")),
+        fast_sgd = p_lgl(default = FALSE, tags = c("train", "umap")),
+        n_threads = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        n_sgd_threads = p_int(0L, default = 0L, special_vals = list("auto"), tags = c("train", "umap")),
+        grain_size = p_int(1L, default = 1L, tags = c("train", "umap")),
+        verbose = p_lgl(default = TRUE, tags = c("train", "umap")),
+        batch = p_lgl(default = FALSE, tags = c("train", "umap")),
+        opt_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = check_list),
+        epoch_callback = p_uty(default = NULL, tags = c("train", "umap"), custom_check = check_function_or_null),
+        pca_method = p_fct(c("irlba", "rsvd", "bigstatsr", "svd", "auto"), default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        binary_edge_weights = p_lgl(default = FALSE, tags = c("train", "umap")),
+        dens_scale = p_dbl(0, 1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        seed = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        nn_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = check_list)
+      )
+      ps$set_values(verbose = FALSE)
+
+      super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer"))
+    }
+  ),
+  private = list(
+
+    .train_dt = function(dt, levels, target) {
+      params = insert_named(self$param_set$get_values(tags = "umap"), list(ret_model = TRUE))
+      umap = invoke(uwot::umap2, dt, .args = params)
+      self$state = umap
+      self$state$embedding = NULL
+      umap$embedding
+    },
+
+    .predict_dt = function(dt, levels) {
+      invoke(uwot::umap_transform, dt, self$state)
+    }
+  )
+)
+
+mlr_pipeops$add("umap", PipeOpUMAP)

From ffdf668bd65e6c1cb5847197f869e891f3ed3e85 Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Sun, 28 Jul 2024 11:44:24 +0200
Subject: [PATCH 02/36] docs: run document

---
 DESCRIPTION                              |   1 +
 NAMESPACE                                |   1 +
 man/PipeOp.Rd                            |   1 +
 man/PipeOpEnsemble.Rd                    |   1 +
 man/PipeOpImpute.Rd                      |   1 +
 man/PipeOpTargetTrafo.Rd                 |   1 +
 man/PipeOpTaskPreproc.Rd                 |   1 +
 man/PipeOpTaskPreprocSimple.Rd           |   1 +
 man/mlr_pipeops.Rd                       |   1 +
 man/mlr_pipeops_boxcox.Rd                |   1 +
 man/mlr_pipeops_branch.Rd                |   1 +
 man/mlr_pipeops_chunk.Rd                 |   1 +
 man/mlr_pipeops_classbalancing.Rd        |   1 +
 man/mlr_pipeops_classifavg.Rd            |   1 +
 man/mlr_pipeops_classweights.Rd          |   1 +
 man/mlr_pipeops_colapply.Rd              |   1 +
 man/mlr_pipeops_collapsefactors.Rd       |   1 +
 man/mlr_pipeops_colroles.Rd              |   1 +
 man/mlr_pipeops_copy.Rd                  |   1 +
 man/mlr_pipeops_datefeatures.Rd          |   1 +
 man/mlr_pipeops_encode.Rd                |   1 +
 man/mlr_pipeops_encodeimpact.Rd          |   1 +
 man/mlr_pipeops_encodelmer.Rd            |   1 +
 man/mlr_pipeops_featureunion.Rd          |   1 +
 man/mlr_pipeops_filter.Rd                |   1 +
 man/mlr_pipeops_fixfactors.Rd            |   1 +
 man/mlr_pipeops_histbin.Rd               |   1 +
 man/mlr_pipeops_ica.Rd                   |   1 +
 man/mlr_pipeops_imputeconstant.Rd        |   1 +
 man/mlr_pipeops_imputehist.Rd            |   1 +
 man/mlr_pipeops_imputelearner.Rd         |   1 +
 man/mlr_pipeops_imputemean.Rd            |   1 +
 man/mlr_pipeops_imputemedian.Rd          |   1 +
 man/mlr_pipeops_imputemode.Rd            |   1 +
 man/mlr_pipeops_imputeoor.Rd             |   1 +
 man/mlr_pipeops_imputesample.Rd          |   1 +
 man/mlr_pipeops_kernelpca.Rd             |   1 +
 man/mlr_pipeops_learner.Rd               |   1 +
 man/mlr_pipeops_missind.Rd               |   1 +
 man/mlr_pipeops_modelmatrix.Rd           |   1 +
 man/mlr_pipeops_multiplicityexply.Rd     |   1 +
 man/mlr_pipeops_multiplicityimply.Rd     |   1 +
 man/mlr_pipeops_mutate.Rd                |   1 +
 man/mlr_pipeops_nmf.Rd                   |   1 +
 man/mlr_pipeops_nop.Rd                   |   1 +
 man/mlr_pipeops_ovrsplit.Rd              |   1 +
 man/mlr_pipeops_ovrunite.Rd              |   1 +
 man/mlr_pipeops_pca.Rd                   |   1 +
 man/mlr_pipeops_proxy.Rd                 |   1 +
 man/mlr_pipeops_quantilebin.Rd           |   1 +
 man/mlr_pipeops_randomprojection.Rd      |   1 +
 man/mlr_pipeops_randomresponse.Rd        |   1 +
 man/mlr_pipeops_regravg.Rd               |   1 +
 man/mlr_pipeops_removeconstants.Rd       |   1 +
 man/mlr_pipeops_renamecolumns.Rd         |   1 +
 man/mlr_pipeops_replicate.Rd             |   1 +
 man/mlr_pipeops_scale.Rd                 |   1 +
 man/mlr_pipeops_scalemaxabs.Rd           |   1 +
 man/mlr_pipeops_scalerange.Rd            |   1 +
 man/mlr_pipeops_select.Rd                |   1 +
 man/mlr_pipeops_smote.Rd                 |   1 +
 man/mlr_pipeops_spatialsign.Rd           |   1 +
 man/mlr_pipeops_subsample.Rd             |   1 +
 man/mlr_pipeops_targetinvert.Rd          |   1 +
 man/mlr_pipeops_targetmutate.Rd          |   1 +
 man/mlr_pipeops_targettrafoscalerange.Rd |   1 +
 man/mlr_pipeops_textvectorizer.Rd        |   1 +
 man/mlr_pipeops_threshold.Rd             |   1 +
 man/mlr_pipeops_tunethreshold.Rd         |   1 +
 man/mlr_pipeops_umap.Rd                  | 160 +++++++++++++++++++++++
 man/mlr_pipeops_unbranch.Rd              |   1 +
 man/mlr_pipeops_updatetarget.Rd          |   1 +
 man/mlr_pipeops_vtreat.Rd                |   1 +
 man/mlr_pipeops_yeojohnson.Rd            |   1 +
 74 files changed, 233 insertions(+)
 create mode 100644 man/mlr_pipeops_umap.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index afe29cf56..5a74e9138 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -165,6 +165,7 @@ Collate:
     'PipeOpThreshold.R'
     'PipeOpTrafo.R'
     'PipeOpTuneThreshold.R'
+    'PipeOpUMAP.R'
     'PipeOpUnbranch.R'
     'PipeOpVtreat.R'
     'PipeOpYeoJohnson.R'
diff --git a/NAMESPACE b/NAMESPACE
index d69d21c09..5b6783ac1 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -108,6 +108,7 @@ export(PipeOpTaskPreprocSimple)
 export(PipeOpTextVectorizer)
 export(PipeOpThreshold)
 export(PipeOpTuneThreshold)
+export(PipeOpUMAP)
 export(PipeOpUnbranch)
 export(PipeOpVtreat)
 export(PipeOpYeoJohnson)
diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd
index 82d829e42..bf0debbd3 100644
--- a/man/PipeOp.Rd
+++ b/man/PipeOp.Rd
@@ -333,6 +333,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd
index 61ac51bb9..ab016c9f9 100644
--- a/man/PipeOpEnsemble.Rd
+++ b/man/PipeOpEnsemble.Rd
@@ -165,6 +165,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd
index e52256e79..7fc9e598d 100644
--- a/man/PipeOpImpute.Rd
+++ b/man/PipeOpImpute.Rd
@@ -195,6 +195,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd
index 8d811ef60..6fc37dc5e 100644
--- a/man/PipeOpTargetTrafo.Rd
+++ b/man/PipeOpTargetTrafo.Rd
@@ -206,6 +206,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd
index 817173680..25941823d 100644
--- a/man/PipeOpTaskPreproc.Rd
+++ b/man/PipeOpTaskPreproc.Rd
@@ -261,6 +261,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd
index 69ec70f72..2e0928fb8 100644
--- a/man/PipeOpTaskPreprocSimple.Rd
+++ b/man/PipeOpTaskPreprocSimple.Rd
@@ -198,6 +198,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd
index bba536267..c531acb78 100644
--- a/man/mlr_pipeops.Rd
+++ b/man/mlr_pipeops.Rd
@@ -135,6 +135,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd
index 0d514ce8c..da206d61d 100644
--- a/man/mlr_pipeops_boxcox.Rd
+++ b/man/mlr_pipeops_boxcox.Rd
@@ -149,6 +149,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd
index e9b855e0a..1830f2c48 100644
--- a/man/mlr_pipeops_branch.Rd
+++ b/man/mlr_pipeops_branch.Rd
@@ -167,6 +167,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd
index 7603b5b0f..e35161348 100644
--- a/man/mlr_pipeops_chunk.Rd
+++ b/man/mlr_pipeops_chunk.Rd
@@ -146,6 +146,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd
index c734631a9..0b4cfccf6 100644
--- a/man/mlr_pipeops_classbalancing.Rd
+++ b/man/mlr_pipeops_classbalancing.Rd
@@ -187,6 +187,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd
index 381046572..cfd2e68e7 100644
--- a/man/mlr_pipeops_classifavg.Rd
+++ b/man/mlr_pipeops_classifavg.Rd
@@ -163,6 +163,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd
index ea3eef216..86b799874 100644
--- a/man/mlr_pipeops_classweights.Rd
+++ b/man/mlr_pipeops_classweights.Rd
@@ -155,6 +155,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd
index e2e8bbe54..23ba274c5 100644
--- a/man/mlr_pipeops_colapply.Rd
+++ b/man/mlr_pipeops_colapply.Rd
@@ -176,6 +176,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd
index 4dc6dc619..32e0c0f60 100644
--- a/man/mlr_pipeops_collapsefactors.Rd
+++ b/man/mlr_pipeops_collapsefactors.Rd
@@ -143,6 +143,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd
index 73a5ee723..5996de056 100644
--- a/man/mlr_pipeops_colroles.Rd
+++ b/man/mlr_pipeops_colroles.Rd
@@ -135,6 +135,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd
index c09aff0cf..5b9f6a03e 100644
--- a/man/mlr_pipeops_copy.Rd
+++ b/man/mlr_pipeops_copy.Rd
@@ -165,6 +165,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd
index eb881ec59..636a08f89 100644
--- a/man/mlr_pipeops_datefeatures.Rd
+++ b/man/mlr_pipeops_datefeatures.Rd
@@ -182,6 +182,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd
index 79f2e3a8c..390194822 100644
--- a/man/mlr_pipeops_encode.Rd
+++ b/man/mlr_pipeops_encode.Rd
@@ -178,6 +178,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd
index 8033735f0..6c3300407 100644
--- a/man/mlr_pipeops_encodeimpact.Rd
+++ b/man/mlr_pipeops_encodeimpact.Rd
@@ -160,6 +160,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd
index bfd1285ec..30e2c255d 100644
--- a/man/mlr_pipeops_encodelmer.Rd
+++ b/man/mlr_pipeops_encodelmer.Rd
@@ -175,6 +175,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd
index e0dbf21b6..4fe2ce4d8 100644
--- a/man/mlr_pipeops_featureunion.Rd
+++ b/man/mlr_pipeops_featureunion.Rd
@@ -180,6 +180,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd
index 3d5d2fc53..6ba6170a4 100644
--- a/man/mlr_pipeops_filter.Rd
+++ b/man/mlr_pipeops_filter.Rd
@@ -211,6 +211,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd
index 5fd00abc2..e273af8c0 100644
--- a/man/mlr_pipeops_fixfactors.Rd
+++ b/man/mlr_pipeops_fixfactors.Rd
@@ -135,6 +135,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd
index 74d6c5e2f..7eacb0edd 100644
--- a/man/mlr_pipeops_histbin.Rd
+++ b/man/mlr_pipeops_histbin.Rd
@@ -147,6 +147,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd
index 1e6473928..28707a9e0 100644
--- a/man/mlr_pipeops_ica.Rd
+++ b/man/mlr_pipeops_ica.Rd
@@ -175,6 +175,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd
index 0c35c4a9a..58297123b 100644
--- a/man/mlr_pipeops_imputeconstant.Rd
+++ b/man/mlr_pipeops_imputeconstant.Rd
@@ -149,6 +149,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd
index 0fb6d8f1f..6a13b3f32 100644
--- a/man/mlr_pipeops_imputehist.Rd
+++ b/man/mlr_pipeops_imputehist.Rd
@@ -140,6 +140,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd
index a2f9ea073..710906408 100644
--- a/man/mlr_pipeops_imputelearner.Rd
+++ b/man/mlr_pipeops_imputelearner.Rd
@@ -186,6 +186,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd
index bd8d788a5..e529cbe04 100644
--- a/man/mlr_pipeops_imputemean.Rd
+++ b/man/mlr_pipeops_imputemean.Rd
@@ -134,6 +134,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd
index 00145e29d..baad75bb4 100644
--- a/man/mlr_pipeops_imputemedian.Rd
+++ b/man/mlr_pipeops_imputemedian.Rd
@@ -134,6 +134,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd
index 613970b73..0e1b1d78a 100644
--- a/man/mlr_pipeops_imputemode.Rd
+++ b/man/mlr_pipeops_imputemode.Rd
@@ -141,6 +141,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd
index c5766c7e6..259e0f3aa 100644
--- a/man/mlr_pipeops_imputeoor.Rd
+++ b/man/mlr_pipeops_imputeoor.Rd
@@ -163,6 +163,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd
index 2944213ce..625febcea 100644
--- a/man/mlr_pipeops_imputesample.Rd
+++ b/man/mlr_pipeops_imputesample.Rd
@@ -136,6 +136,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd
index e2d92e746..40ffc3bce 100644
--- a/man/mlr_pipeops_kernelpca.Rd
+++ b/man/mlr_pipeops_kernelpca.Rd
@@ -150,6 +150,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd
index 023c5ca9c..1304d087f 100644
--- a/man/mlr_pipeops_learner.Rd
+++ b/man/mlr_pipeops_learner.Rd
@@ -181,6 +181,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd
index d1ac309e3..8de70bc0c 100644
--- a/man/mlr_pipeops_missind.Rd
+++ b/man/mlr_pipeops_missind.Rd
@@ -164,6 +164,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd
index d27063f12..6876b541d 100644
--- a/man/mlr_pipeops_modelmatrix.Rd
+++ b/man/mlr_pipeops_modelmatrix.Rd
@@ -140,6 +140,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd
index 01531c672..365516422 100644
--- a/man/mlr_pipeops_multiplicityexply.Rd
+++ b/man/mlr_pipeops_multiplicityexply.Rd
@@ -146,6 +146,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd
index b5a3400ab..4557895c6 100644
--- a/man/mlr_pipeops_multiplicityimply.Rd
+++ b/man/mlr_pipeops_multiplicityimply.Rd
@@ -152,6 +152,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd
index cff63d4b1..4ba9f9920 100644
--- a/man/mlr_pipeops_mutate.Rd
+++ b/man/mlr_pipeops_mutate.Rd
@@ -157,6 +157,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 148dfbcfd..1de3f5083 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -192,6 +192,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd
index eabc9e48f..b195e5648 100644
--- a/man/mlr_pipeops_nop.Rd
+++ b/man/mlr_pipeops_nop.Rd
@@ -142,6 +142,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd
index e0718678b..f1eb5a1b4 100644
--- a/man/mlr_pipeops_ovrsplit.Rd
+++ b/man/mlr_pipeops_ovrsplit.Rd
@@ -159,6 +159,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd
index 83f3c85c2..4010905ed 100644
--- a/man/mlr_pipeops_ovrunite.Rd
+++ b/man/mlr_pipeops_ovrunite.Rd
@@ -154,6 +154,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd
index ca5d14d59..262c0ce4c 100644
--- a/man/mlr_pipeops_pca.Rd
+++ b/man/mlr_pipeops_pca.Rd
@@ -151,6 +151,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd
index 1e6e8f9c0..8f962f21a 100644
--- a/man/mlr_pipeops_proxy.Rd
+++ b/man/mlr_pipeops_proxy.Rd
@@ -165,6 +165,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd
index 8b416ee52..624d5f708 100644
--- a/man/mlr_pipeops_quantilebin.Rd
+++ b/man/mlr_pipeops_quantilebin.Rd
@@ -139,6 +139,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd
index e41d6ea42..16839ff3c 100644
--- a/man/mlr_pipeops_randomprojection.Rd
+++ b/man/mlr_pipeops_randomprojection.Rd
@@ -151,6 +151,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd
index 2f813a326..c3364c453 100644
--- a/man/mlr_pipeops_randomresponse.Rd
+++ b/man/mlr_pipeops_randomresponse.Rd
@@ -168,6 +168,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd
index 4b1603441..d56d1c5a5 100644
--- a/man/mlr_pipeops_regravg.Rd
+++ b/man/mlr_pipeops_regravg.Rd
@@ -154,6 +154,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd
index 4fe961f7c..042998703 100644
--- a/man/mlr_pipeops_removeconstants.Rd
+++ b/man/mlr_pipeops_removeconstants.Rd
@@ -144,6 +144,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd
index 768211b84..44e7c2209 100644
--- a/man/mlr_pipeops_renamecolumns.Rd
+++ b/man/mlr_pipeops_renamecolumns.Rd
@@ -143,6 +143,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd
index 7735a2586..e07d3538b 100644
--- a/man/mlr_pipeops_replicate.Rd
+++ b/man/mlr_pipeops_replicate.Rd
@@ -136,6 +136,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd
index 33d4e027e..5f397b2a2 100644
--- a/man/mlr_pipeops_scale.Rd
+++ b/man/mlr_pipeops_scale.Rd
@@ -158,6 +158,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd
index 279a2c7c1..29946719b 100644
--- a/man/mlr_pipeops_scalemaxabs.Rd
+++ b/man/mlr_pipeops_scalemaxabs.Rd
@@ -133,6 +133,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd
index 707ca661c..3a7ce39e3 100644
--- a/man/mlr_pipeops_scalerange.Rd
+++ b/man/mlr_pipeops_scalerange.Rd
@@ -138,6 +138,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd
index c3d8ec0f9..d4e25fb1b 100644
--- a/man/mlr_pipeops_select.Rd
+++ b/man/mlr_pipeops_select.Rd
@@ -154,6 +154,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd
index b92867d6e..7a526507c 100644
--- a/man/mlr_pipeops_smote.Rd
+++ b/man/mlr_pipeops_smote.Rd
@@ -157,6 +157,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd
index 4995632ca..12d25a921 100644
--- a/man/mlr_pipeops_spatialsign.Rd
+++ b/man/mlr_pipeops_spatialsign.Rd
@@ -133,6 +133,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd
index 5a0a3c9fc..720722479 100644
--- a/man/mlr_pipeops_subsample.Rd
+++ b/man/mlr_pipeops_subsample.Rd
@@ -148,6 +148,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd
index a63ea2feb..c95d0e7f3 100644
--- a/man/mlr_pipeops_targetinvert.Rd
+++ b/man/mlr_pipeops_targetinvert.Rd
@@ -133,6 +133,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd
index 5193c2db9..806c1854a 100644
--- a/man/mlr_pipeops_targetmutate.Rd
+++ b/man/mlr_pipeops_targetmutate.Rd
@@ -181,6 +181,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd
index 8441b3f1e..a6e38c132 100644
--- a/man/mlr_pipeops_targettrafoscalerange.Rd
+++ b/man/mlr_pipeops_targettrafoscalerange.Rd
@@ -147,6 +147,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd
index 6212e6ad6..775fd717c 100644
--- a/man/mlr_pipeops_textvectorizer.Rd
+++ b/man/mlr_pipeops_textvectorizer.Rd
@@ -247,6 +247,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_targettrafoscalerange}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd
index 98c3039c0..3eeee4561 100644
--- a/man/mlr_pipeops_threshold.Rd
+++ b/man/mlr_pipeops_threshold.Rd
@@ -140,6 +140,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_targettrafoscalerange}},
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd
index 7ce2bc4ab..a21c76601 100644
--- a/man/mlr_pipeops_tunethreshold.Rd
+++ b/man/mlr_pipeops_tunethreshold.Rd
@@ -165,6 +165,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_targettrafoscalerange}},
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
new file mode 100644
index 000000000..86b37320a
--- /dev/null
+++ b/man/mlr_pipeops_umap.Rd
@@ -0,0 +1,160 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/PipeOpUMAP.R
+\name{mlr_pipeops_umap}
+\alias{mlr_pipeops_umap}
+\alias{PipeOpUMAP}
+\title{Uniform Manifold Approximation and Projection (UMAP)}
+\format{
+\code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
+}
+\description{
+Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP).
+See \code{\link[uwot:umap2]{uwot::umap2()}} for details.
+}
+\section{Construction}{
+
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{PipeOpUMAP$new(id = "umap", param_vals = list())
+}\if{html}{\out{</div>}}
+\itemize{
+\item \code{id} :: \code{character(1)}\cr
+Identifier of resulting object, default \code{"umap"}.
+\item \code{param_vals} :: named \code{list}\cr
+List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}.
+}
+}
+
+\section{Input and Output Channels}{
+
+Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}.
+
+The output is the input \code{\link[mlr3:Task]{Task}} with all affected numeric features replaced by their principal components.
+}
+
+\section{State}{
+
+The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the class \link[stats:prcomp]{stats::prcomp},
+with the exception of the \verb{$x} slot. These are in particular:
+\itemize{
+\item \code{sdev} :: \code{numeric}\cr
+The standard deviations of the principal components.
+\item \code{rotation} :: \code{matrix}\cr
+The matrix of variable loadings.
+\item \code{center} :: \code{numeric} | \code{logical(1)}\cr
+The centering used, or \code{FALSE}.
+\item \code{scale} :: \code{numeric} | \code{logical(1)}\cr
+The scaling used, or \code{FALSE}.
+}
+}
+
+\section{Parameters}{
+
+The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
+\itemize{
+\item \code{center} :: \code{logical(1)}\cr
+Indicating whether the features should be centered. Default is \code{TRUE}. See \code{\link[stats:prcomp]{prcomp()}}.
+\item \code{scale.} :: \code{logical(1)}\cr
+Whether to scale features to unit variance before analysis. Default is \code{FALSE}, but scaling is advisable. See \code{\link[stats:prcomp]{prcomp()}}.
+\item \code{rank.} :: \code{integer(1)}\cr
+Maximal number of principal components to be used. Default is \code{NULL}: use all components. See \code{\link[stats:prcomp]{prcomp()}}.
+}
+}
+
+\section{Internals}{
+
+Uses the \code{\link[uwot:umap]{umap()}} function.
+}
+
+\section{Methods}{
+
+Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
+}
+
+\examples{
+library("mlr3")
+
+task = tsk("iris")
+pop = po("umap")
+
+task$data()
+pop$train(list(task))[[1]]$data()
+
+pop$state
+}
+\seealso{
+https://mlr-org.com/pipeops.html
+
+Other PipeOps: 
+\code{\link{PipeOp}},
+\code{\link{PipeOpEnsemble}},
+\code{\link{PipeOpImpute}},
+\code{\link{PipeOpTargetTrafo}},
+\code{\link{PipeOpTaskPreproc}},
+\code{\link{PipeOpTaskPreprocSimple}},
+\code{\link{mlr_pipeops}},
+\code{\link{mlr_pipeops_boxcox}},
+\code{\link{mlr_pipeops_branch}},
+\code{\link{mlr_pipeops_chunk}},
+\code{\link{mlr_pipeops_classbalancing}},
+\code{\link{mlr_pipeops_classifavg}},
+\code{\link{mlr_pipeops_classweights}},
+\code{\link{mlr_pipeops_colapply}},
+\code{\link{mlr_pipeops_collapsefactors}},
+\code{\link{mlr_pipeops_colroles}},
+\code{\link{mlr_pipeops_copy}},
+\code{\link{mlr_pipeops_datefeatures}},
+\code{\link{mlr_pipeops_encode}},
+\code{\link{mlr_pipeops_encodeimpact}},
+\code{\link{mlr_pipeops_encodelmer}},
+\code{\link{mlr_pipeops_featureunion}},
+\code{\link{mlr_pipeops_filter}},
+\code{\link{mlr_pipeops_fixfactors}},
+\code{\link{mlr_pipeops_histbin}},
+\code{\link{mlr_pipeops_ica}},
+\code{\link{mlr_pipeops_imputeconstant}},
+\code{\link{mlr_pipeops_imputehist}},
+\code{\link{mlr_pipeops_imputelearner}},
+\code{\link{mlr_pipeops_imputemean}},
+\code{\link{mlr_pipeops_imputemedian}},
+\code{\link{mlr_pipeops_imputemode}},
+\code{\link{mlr_pipeops_imputeoor}},
+\code{\link{mlr_pipeops_imputesample}},
+\code{\link{mlr_pipeops_kernelpca}},
+\code{\link{mlr_pipeops_learner}},
+\code{\link{mlr_pipeops_missind}},
+\code{\link{mlr_pipeops_modelmatrix}},
+\code{\link{mlr_pipeops_multiplicityexply}},
+\code{\link{mlr_pipeops_multiplicityimply}},
+\code{\link{mlr_pipeops_mutate}},
+\code{\link{mlr_pipeops_nmf}},
+\code{\link{mlr_pipeops_nop}},
+\code{\link{mlr_pipeops_ovrsplit}},
+\code{\link{mlr_pipeops_ovrunite}},
+\code{\link{mlr_pipeops_pca}},
+\code{\link{mlr_pipeops_proxy}},
+\code{\link{mlr_pipeops_quantilebin}},
+\code{\link{mlr_pipeops_randomprojection}},
+\code{\link{mlr_pipeops_randomresponse}},
+\code{\link{mlr_pipeops_regravg}},
+\code{\link{mlr_pipeops_removeconstants}},
+\code{\link{mlr_pipeops_renamecolumns}},
+\code{\link{mlr_pipeops_replicate}},
+\code{\link{mlr_pipeops_scale}},
+\code{\link{mlr_pipeops_scalemaxabs}},
+\code{\link{mlr_pipeops_scalerange}},
+\code{\link{mlr_pipeops_select}},
+\code{\link{mlr_pipeops_smote}},
+\code{\link{mlr_pipeops_spatialsign}},
+\code{\link{mlr_pipeops_subsample}},
+\code{\link{mlr_pipeops_targetinvert}},
+\code{\link{mlr_pipeops_targetmutate}},
+\code{\link{mlr_pipeops_targettrafoscalerange}},
+\code{\link{mlr_pipeops_textvectorizer}},
+\code{\link{mlr_pipeops_threshold}},
+\code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_unbranch}},
+\code{\link{mlr_pipeops_updatetarget}},
+\code{\link{mlr_pipeops_vtreat}},
+\code{\link{mlr_pipeops_yeojohnson}}
+}
+\concept{PipeOps}
diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd
index a6986b956..36cb69044 100644
--- a/man/mlr_pipeops_unbranch.Rd
+++ b/man/mlr_pipeops_unbranch.Rd
@@ -146,6 +146,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
 \code{\link{mlr_pipeops_yeojohnson}}
diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd
index 9e1ae3b06..b553d330a 100644
--- a/man/mlr_pipeops_updatetarget.Rd
+++ b/man/mlr_pipeops_updatetarget.Rd
@@ -161,6 +161,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_vtreat}},
 \code{\link{mlr_pipeops_yeojohnson}}
diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd
index 67f23519d..c9edef73a 100644
--- a/man/mlr_pipeops_vtreat.Rd
+++ b/man/mlr_pipeops_vtreat.Rd
@@ -214,6 +214,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_yeojohnson}}
diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd
index 82284c0d1..912b316c8 100644
--- a/man/mlr_pipeops_yeojohnson.Rd
+++ b/man/mlr_pipeops_yeojohnson.Rd
@@ -151,6 +151,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}}

From 0795faea6d8b8bea90021bf8ebe2e192fae1607e Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Sun, 28 Jul 2024 12:25:51 +0200
Subject: [PATCH 03/36] docs: more param docs

---
 R/PipeOpUMAP.R                    | 90 +++++++++++++++++++++++++++----
 man/mlr_pipeops_umap.Rd           | 84 ++++++++++++++++++++++++++---
 tests/testthat/test_pipeop_umap.R |  7 +++
 3 files changed, 165 insertions(+), 16 deletions(-)
 create mode 100644 tests/testthat/test_pipeop_umap.R

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index bb793dac1..2151185dd 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -37,12 +37,84 @@
 #'
 #' @section Parameters:
 #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
-#' * `center` :: `logical(1)`\cr
-#'   Indicating whether the features should be centered. Default is `TRUE`. See [`prcomp()`][stats::prcomp].
-#' * `scale.` :: `logical(1)`\cr
-#'   Whether to scale features to unit variance before analysis. Default is `FALSE`, but scaling is advisable. See [`prcomp()`][stats::prcomp].
-#' * `rank.` :: `integer(1)`\cr
-#'   Maximal number of principal components to be used. Default is `NULL`: use all components. See [`prcomp()`][stats::prcomp].
+#' * `n_neighbors` :: `integer(1)`\cr
+#'   Blah
+#' * `n_components` :: `integer(1)`\cr
+#'   Blah
+#' * `metric` :: `character(1)`\cr
+#'   Blah
+#' * `n_epochs` :: `integer(1)`\cr
+#'   Blah
+#' * `learning_rate` :: `numeric(1)`\cr
+#'   Blah
+#' * `init` :: `character(1)`\cr
+#'   Blah
+#' * `init_sdev` :: `character(1)`\cr
+#'   Blah
+#' * `spread` :: `character(1)`\cr
+#'   Blah
+#' * `min_dist` :: `character(1)`\cr
+#'   Blah
+#' * `set_op_mix_ratio` :: `character(1)`\cr
+#'   Blah
+#' * `local_connectivity` :: `character(1)`\cr
+#'   Blah
+#' * `bandwidth` :: `character(1)`\cr
+#'   Blah
+#' * `repulsion_strength` :: `character(1)`\cr
+#'   Blah
+#' * `a` :: `character(1)`\cr
+#'   Blah
+#' * `b` :: `character(1)`\cr
+#'   Blah
+#' * `nn_method` :: `character(1)`\cr
+#'   Blah
+#' * `n_trees` :: `character(1)`\cr
+#'   Blah
+#' * `search_k` :: `character(1)`\cr
+#'   Blah
+#' * `approx_pow` :: `character(1)`\cr
+#'   Blah
+#' * `y` :: `character(1)`\cr
+#'   Blah
+#' * `target_n_neighbors` :: `character(1)`\cr
+#'   Blah
+#' * `target_metric` :: `character(1)`\cr
+#'   Blah
+#' * `target_weight` :: `character(1)`\cr
+#'   Blah
+#' * `pca` :: `character(1)`\cr
+#'   Blah
+#' * `pca_center` :: `character(1)`\cr
+#'   Blah
+#' * `pca_rand` :: `character(1)`\cr
+#'   Blah
+#' * `fast_sgd` :: `character(1)`\cr
+#'   Blah
+#' * `n_threads` :: `character(1)`\cr
+#'   Blah
+#' * `n_sgd_threads` :: `character(1)`\cr
+#'   Blah
+#' * `grain_size` :: `character(1)`\cr
+#'   Blah
+#' * `verbose` :: `character(1)`\cr
+#'   Blah
+#' * `batch` :: `character(1)`\cr
+#'   Blah
+#' * `opt_args` :: `character(1)`\cr
+#'   Blah
+#' * `epoch_callback` :: `character(1)`\cr
+#'   Blah
+#' * `pca_method` :: `character(1)`\cr
+#'   Blah
+#' * `binary_edge_weights` :: `character(1)`\cr
+#'   Blah
+#' * `dens_scale` :: `character(1)`\cr
+#'   Blah
+#' * `seed` :: `character(1)`\cr
+#'   Blah
+#' * `nn_args` :: `character(1)`\cr
+#'   Blah
 #'
 #' @section Internals:
 #' Uses the [`umap()`][uwot::umap] function.
@@ -123,13 +195,13 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         grain_size = p_int(1L, default = 1L, tags = c("train", "umap")),
         verbose = p_lgl(default = TRUE, tags = c("train", "umap")),
         batch = p_lgl(default = FALSE, tags = c("train", "umap")),
-        opt_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = check_list),
+        opt_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = crate(function(x) check_list(x, null.ok = TRUE))),
         epoch_callback = p_uty(default = NULL, tags = c("train", "umap"), custom_check = check_function_or_null),
         pca_method = p_fct(c("irlba", "rsvd", "bigstatsr", "svd", "auto"), default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         binary_edge_weights = p_lgl(default = FALSE, tags = c("train", "umap")),
         dens_scale = p_dbl(0, 1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         seed = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
-        nn_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = check_list)
+        nn_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = crate(function(x) check_list(x, null.ok = TRUE)))
       )
       ps$set_values(verbose = FALSE)
 
@@ -137,12 +209,10 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
     }
   ),
   private = list(
-
     .train_dt = function(dt, levels, target) {
       params = insert_named(self$param_set$get_values(tags = "umap"), list(ret_model = TRUE))
       umap = invoke(uwot::umap2, dt, .args = params)
       self$state = umap
-      self$state$embedding = NULL
       umap$embedding
     },
 
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 86b37320a..1e9d052d2 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -51,12 +51,84 @@ The scaling used, or \code{FALSE}.
 
 The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
 \itemize{
-\item \code{center} :: \code{logical(1)}\cr
-Indicating whether the features should be centered. Default is \code{TRUE}. See \code{\link[stats:prcomp]{prcomp()}}.
-\item \code{scale.} :: \code{logical(1)}\cr
-Whether to scale features to unit variance before analysis. Default is \code{FALSE}, but scaling is advisable. See \code{\link[stats:prcomp]{prcomp()}}.
-\item \code{rank.} :: \code{integer(1)}\cr
-Maximal number of principal components to be used. Default is \code{NULL}: use all components. See \code{\link[stats:prcomp]{prcomp()}}.
+\item \code{n_neighbors} :: \code{integer(1)}\cr
+Blah
+\item \code{n_components} :: \code{integer(1)}\cr
+Blah
+\item \code{metric} :: \code{character(1)}\cr
+Blah
+\item \code{n_epochs} :: \code{integer(1)}\cr
+Blah
+\item \code{learning_rate} :: \code{numeric(1)}\cr
+Blah
+\item \code{init} :: \code{character(1)}\cr
+Blah
+\item \code{init_sdev} :: \code{character(1)}\cr
+Blah
+\item \code{spread} :: \code{character(1)}\cr
+Blah
+\item \code{min_dist} :: \code{character(1)}\cr
+Blah
+\item \code{set_op_mix_ratio} :: \code{character(1)}\cr
+Blah
+\item \code{local_connectivity} :: \code{character(1)}\cr
+Blah
+\item \code{bandwidth} :: \code{character(1)}\cr
+Blah
+\item \code{repulsion_strength} :: \code{character(1)}\cr
+Blah
+\item \code{a} :: \code{character(1)}\cr
+Blah
+\item \code{b} :: \code{character(1)}\cr
+Blah
+\item \code{nn_method} :: \code{character(1)}\cr
+Blah
+\item \code{n_trees} :: \code{character(1)}\cr
+Blah
+\item \code{search_k} :: \code{character(1)}\cr
+Blah
+\item \code{approx_pow} :: \code{character(1)}\cr
+Blah
+\item \code{y} :: \code{character(1)}\cr
+Blah
+\item \code{target_n_neighbors} :: \code{character(1)}\cr
+Blah
+\item \code{target_metric} :: \code{character(1)}\cr
+Blah
+\item \code{target_weight} :: \code{character(1)}\cr
+Blah
+\item \code{pca} :: \code{character(1)}\cr
+Blah
+\item \code{pca_center} :: \code{character(1)}\cr
+Blah
+\item \code{pca_rand} :: \code{character(1)}\cr
+Blah
+\item \code{fast_sgd} :: \code{character(1)}\cr
+Blah
+\item \code{n_threads} :: \code{character(1)}\cr
+Blah
+\item \code{n_sgd_threads} :: \code{character(1)}\cr
+Blah
+\item \code{grain_size} :: \code{character(1)}\cr
+Blah
+\item \code{verbose} :: \code{character(1)}\cr
+Blah
+\item \code{batch} :: \code{character(1)}\cr
+Blah
+\item \code{opt_args} :: \code{character(1)}\cr
+Blah
+\item \code{epoch_callback} :: \code{character(1)}\cr
+Blah
+\item \code{pca_method} :: \code{character(1)}\cr
+Blah
+\item \code{binary_edge_weights} :: \code{character(1)}\cr
+Blah
+\item \code{dens_scale} :: \code{character(1)}\cr
+Blah
+\item \code{seed} :: \code{character(1)}\cr
+Blah
+\item \code{nn_args} :: \code{character(1)}\cr
+Blah
 }
 }
 
diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
new file mode 100644
index 000000000..0faf5e36c
--- /dev/null
+++ b/tests/testthat/test_pipeop_umap.R
@@ -0,0 +1,7 @@
+context("PipeOpUMAP")
+
+test_that("PipeOpUMAP - basic properties", {
+  op = PipeOpUMAP$new()
+  task = mlr_tasks$get("iris")
+  expect_pipeop(op)
+})

From d1fc20ef9190bc1b6530331113b420ef65d07345 Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 11:00:05 +0200
Subject: [PATCH 04/36] docs: more param docs

---
 R/PipeOpUMAP.R          | 165 ++++++++++++++++++++++------------------
 man/mlr_pipeops_umap.Rd | 159 +++++++++++++++++++++-----------------
 2 files changed, 181 insertions(+), 143 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 2151185dd..b029303a0 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -38,83 +38,102 @@
 #' @section Parameters:
 #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
 #' * `n_neighbors` :: `integer(1)`\cr
-#'   Blah
+#'   The size of the neighborhood used for manifold approximation. Default is `15`.
 #' * `n_components` :: `integer(1)`\cr
-#'   Blah
+#'   The dimension of the space to embed into. Default is `2`.
 #' * `metric` :: `character(1)`\cr
-#'   Blah
+#'   Type of distance metric to use to find nearest neighbors. Default is `"euclidean"`.
 #' * `n_epochs` :: `integer(1)`\cr
-#'   Blah
+#'   Number of epochs to use during the optimization of the embedded coordinates.
+#'   By default, this value is set to 500 for datasets containing 10,000 vertices or less,
+#'   and 200 otherwise. If n_epochs = 0, then coordinates determined by "init" will be returned.
 #' * `learning_rate` :: `numeric(1)`\cr
-#'   Blah
-#' * `init` :: `character(1)`\cr
-#'   Blah
-#' * `init_sdev` :: `character(1)`\cr
-#'   Blah
-#' * `spread` :: `character(1)`\cr
-#'   Blah
-#' * `min_dist` :: `character(1)`\cr
-#'   Blah
-#' * `set_op_mix_ratio` :: `character(1)`\cr
-#'   Blah
-#' * `local_connectivity` :: `character(1)`\cr
-#'   Blah
-#' * `bandwidth` :: `character(1)`\cr
-#'   Blah
-#' * `repulsion_strength` :: `character(1)`\cr
-#'   Blah
-#' * `a` :: `character(1)`\cr
-#'   Blah
-#' * `b` :: `character(1)`\cr
-#'   Blah
-#' * `nn_method` :: `character(1)`\cr
-#'   Blah
-#' * `n_trees` :: `character(1)`\cr
-#'   Blah
-#' * `search_k` :: `character(1)`\cr
-#'   Blah
-#' * `approx_pow` :: `character(1)`\cr
-#'   Blah
+#'   Initial learning rate used in optimization of the coordinates. Default is `1`.
+#' * `init` :: `character(1)` | `matrix`\cr
+#'   Type of initialization for the coordinates. Default is `"spectral"`.
+#' * `init_sdev` :: `character(1)` | `numeric(1)`\cr
+#'   Scales each dimension of the initialized coordinates to this standard deviation.
+#'   Default is `"range"`.
+#' * `spread` :: `numeric(1)`\cr
+#'   The effective scale of embedded points. In combination with `min_dist`,
+#'   this determines how clustered/clumped the embedded points are. Default is `1`.
+#' * `min_dist` :: `numeric(1)`\cr
+#'   The effective minimum distance between embedded points. Default is `0.01`.
+#' * `set_op_mix_ratio` :: `numeric(1)`\cr
+#'   Interpolate between (fuzzy) union and intersection as the set operation used to
+#'   combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is `1`.
+#' * `local_connectivity` :: `numeric(1)`\cr
+#'   The local connectivity required – i.e. the number of nearest neighbors that should be
+#'   assumed to be connected at a local level. Default is `1`.
+#' * `bandwidth` :: `numeric(1)`\cr
+#'   The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps.
+#'   Default is `1`.
+#' * `repulsion_strength` :: `numeric(1)`\cr
+#'   Weighting applied to negative samples in low dimensional embedding optimization.
+#'   Values higher than one will result in greater weight being given to negative samples.
+#'   Default is `1`.
+#' * `negative_sample_rate` :: `numeric(1)`\cr
+#'   The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
+#'   in optimizing the low dimensional embedding. Default is `5`.
+#' * `a` :: `any`\cr
+#'   More specific parameters controlling the embedding.
+#'   If `NULL` these values are set automatically as determined by `min_dist` and `spread`.
+#'   Default is `NULL`.
+#' * `b` :: `any`\cr
+#'   More specific parameters controlling the embedding.
+#'   If `NULL` these values are set automatically as determined by `min_dist` and `spread`.
+#'   Default is `NULL`.
+#' * `nn_method` :: `character(1)` | named `list()` | matrix\cr
+#'   Method for finding nearest neighbors. Default is `NULL`.
+#' * `n_trees` :: `integer(1)`\cr
+#'   Number of trees to build when constructing the nearest neighbor index. Default is `50`.
+#' * `search_k` :: `integer(1)`\cr
+#'   Number of nodes to search during the neighbor retrieval.
+#' * `approx_pow` :: `logical(1)`\cr
+#'   If `TRUE`, use an approximation to the power function in the UMAP gradient.
+#'   Ignored if `dens_scale` is non-NULL. Default is `FALSE`.
 #' * `y` :: `character(1)`\cr
-#'   Blah
-#' * `target_n_neighbors` :: `character(1)`\cr
-#'   Blah
+#'   Default is `NULL`.
+#' * `target_n_neighbors` :: `integer(1)`\cr
+#'   Number of nearest neighbors to use to construct the target simplicial set. Default is `NULL`.
 #' * `target_metric` :: `character(1)`\cr
-#'   Blah
-#' * `target_weight` :: `character(1)`\cr
-#'   Blah
-#' * `pca` :: `character(1)`\cr
-#'   Blah
-#' * `pca_center` :: `character(1)`\cr
-#'   Blah
-#' * `pca_rand` :: `character(1)`\cr
-#'   Blah
-#' * `fast_sgd` :: `character(1)`\cr
-#'   Blah
-#' * `n_threads` :: `character(1)`\cr
-#'   Blah
-#' * `n_sgd_threads` :: `character(1)`\cr
-#'   Blah
-#' * `grain_size` :: `character(1)`\cr
-#'   Blah
-#' * `verbose` :: `character(1)`\cr
-#'   Blah
-#' * `batch` :: `character(1)`\cr
-#'   Blah
-#' * `opt_args` :: `character(1)`\cr
-#'   Blah
-#' * `epoch_callback` :: `character(1)`\cr
-#'   Blah
+#'   The metric used to measure distance for `y` if using supervised dimension reduction.
+#'   Used only if `y` is numeric.
+#' * `target_weight` :: `numeric(1)`\cr
+#'   Weighting factor between data topology and target topology. Default is `0.5`.
+#' * `pca` :: `integer(1)`\cr
+#'   Default is `NULL`.
+#' * `pca_center` :: `logical(1)`\cr
+#'   If `TRUE`, center the columns of X before carrying out PCA.
+#'   For binary data, it's recommended to set this to `FALSE`. Default is `TRUE`.
+#' * `pca_rand` :: `logical(1)`\cr
+#'   Default is `TRUE`.
+#' * `fast_sgd` :: `logical(1)`\cr
+#'   Default is `FALSE`.
+#' * `n_threads` :: `integer(1)`\cr
+#'   Default is `NULL`.
+#' * `n_sgd_threads` :: `integer(1)`\cr
+#'   Default is `0`.
+#' * `grain_size` :: `integer(1)`\cr
+#'   Default is `1`.
+#' * `verbose` :: `logical(1)`\cr
+#'    Should details be logged to the console? Initialzed to `FALSE`.
+#' * `batch` :: `logical(1)`\cr
+#'   Default is `FALSE`.
+#' * `opt_args` :: named `list()`\cr
+#'   Default is `NULL`.
+#' * `epoch_callback` :: `function`\cr
+#'   Default is `NULL`.
 #' * `pca_method` :: `character(1)`\cr
-#'   Blah
-#' * `binary_edge_weights` :: `character(1)`\cr
-#'   Blah
-#' * `dens_scale` :: `character(1)`\cr
-#'   Blah
-#' * `seed` :: `character(1)`\cr
-#'   Blah
-#' * `nn_args` :: `character(1)`\cr
-#'   Blah
+#'   Default is `NULL`.
+#' * `binary_edge_weights` :: `logical(1)`\cr
+#'   Default is `FALSE`.
+#' * `dens_scale` :: `numeric(1)`\cr
+#'   Default is `NULL`.
+#' * `seed` :: `integer(1)`\cr
+#'   Default is `NULL`.
+#' * `nn_args` :: named `list()`\cr
+#'   Default is `NULL`.
 #'
 #' @section Internals:
 #' Uses the [`umap()`][uwot::umap] function.
@@ -164,10 +183,10 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         spread = p_dbl(default = 1, tags = c("train", "umap")),
         min_dist = p_dbl(default = 0.01, tags = c("train", "umap")),
         set_op_mix_ratio = p_dbl(0, 1, default = 1, tags = c("train", "umap")),
-        local_connectivity = p_dbl(1, default = 1L, tags = c("train", "umap")),
+        local_connectivity = p_dbl(1, default = 1, tags = c("train", "umap")),
         bandwidth = p_dbl(default = 1, tags = c("train", "umap")),
         repulsion_strength = p_dbl(default = 1, tags = c("train", "umap")),
-        negative_sample_rate = p_dbl(default = 5L, tags = c("train", "umap")),
+        negative_sample_rate = p_dbl(default = 5, tags = c("train", "umap")),
         a = p_uty(default = NULL, tags = c("train", "umap")),
         b = p_uty(default = NULL, tags = c("train", "umap")),
         nn_method = p_uty(
@@ -186,7 +205,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         target_n_neighbors = p_int(tags = c("train", "umap")),
         target_metric = p_fct(c("euclidean", "cosine", "correlation"), default = "euclidean", tags = c("train", "umap")),
         target_weight = p_dbl(0, 1, default = 0.5, tags = c("train", "umap")),
-        pca = p_int(1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        pca = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         pca_center = p_lgl(default = TRUE, tags = c("train", "umap")),
         pca_rand = p_lgl(default = TRUE, tags = c("train", "umap")),
         fast_sgd = p_lgl(default = FALSE, tags = c("train", "umap")),
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 1e9d052d2..541f57159 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -52,83 +52,102 @@ The scaling used, or \code{FALSE}.
 The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
 \itemize{
 \item \code{n_neighbors} :: \code{integer(1)}\cr
-Blah
+The size of the neighborhood used for manifold approximation. Default is \code{15}.
 \item \code{n_components} :: \code{integer(1)}\cr
-Blah
+The dimension of the space to embed into. Default is \code{2}.
 \item \code{metric} :: \code{character(1)}\cr
-Blah
+Type of distance metric to use to find nearest neighbors. Default is \code{"euclidean"}.
 \item \code{n_epochs} :: \code{integer(1)}\cr
-Blah
+Number of epochs to use during the optimization of the embedded coordinates.
+By default, this value is set to 500 for datasets containing 10,000 vertices or less,
+and 200 otherwise. If n_epochs = 0, then coordinates determined by "init" will be returned.
 \item \code{learning_rate} :: \code{numeric(1)}\cr
-Blah
-\item \code{init} :: \code{character(1)}\cr
-Blah
-\item \code{init_sdev} :: \code{character(1)}\cr
-Blah
-\item \code{spread} :: \code{character(1)}\cr
-Blah
-\item \code{min_dist} :: \code{character(1)}\cr
-Blah
-\item \code{set_op_mix_ratio} :: \code{character(1)}\cr
-Blah
-\item \code{local_connectivity} :: \code{character(1)}\cr
-Blah
-\item \code{bandwidth} :: \code{character(1)}\cr
-Blah
-\item \code{repulsion_strength} :: \code{character(1)}\cr
-Blah
-\item \code{a} :: \code{character(1)}\cr
-Blah
-\item \code{b} :: \code{character(1)}\cr
-Blah
-\item \code{nn_method} :: \code{character(1)}\cr
-Blah
-\item \code{n_trees} :: \code{character(1)}\cr
-Blah
-\item \code{search_k} :: \code{character(1)}\cr
-Blah
-\item \code{approx_pow} :: \code{character(1)}\cr
-Blah
+Initial learning rate used in optimization of the coordinates. Default is \code{1}.
+\item \code{init} :: \code{character(1)} | \code{matrix}\cr
+Type of initialization for the coordinates. Default is \code{"spectral"}.
+\item \code{init_sdev} :: \code{character(1)} | \code{numeric(1)}\cr
+Scales each dimension of the initialized coordinates to this standard deviation.
+Default is \code{"range"}.
+\item \code{spread} :: \code{numeric(1)}\cr
+The effective scale of embedded points. In combination with \code{min_dist},
+this determines how clustered/clumped the embedded points are. Default is \code{1}.
+\item \code{min_dist} :: \code{numeric(1)}\cr
+The effective minimum distance between embedded points. Default is \code{0.01}.
+\item \code{set_op_mix_ratio} :: \code{numeric(1)}\cr
+Interpolate between (fuzzy) union and intersection as the set operation used to
+combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is \code{1}.
+\item \code{local_connectivity} :: \code{numeric(1)}\cr
+The local connectivity required – i.e. the number of nearest neighbors that should be
+assumed to be connected at a local level. Default is \code{1}.
+\item \code{bandwidth} :: \code{numeric(1)}\cr
+The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps.
+Default is \code{1}.
+\item \code{repulsion_strength} :: \code{numeric(1)}\cr
+Weighting applied to negative samples in low dimensional embedding optimization.
+Values higher than one will result in greater weight being given to negative samples.
+Default is \code{1}.
+\item \code{negative_sample_rate} :: \code{numeric(1)}\cr
+The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
+in optimizing the low dimensional embedding. Default is \code{5}.
+\item \code{a} :: \code{any}\cr
+More specific parameters controlling the embedding.
+If \code{NULL} these values are set automatically as determined by \code{min_dist} and \code{spread}.
+Default is \code{NULL}.
+\item \code{b} :: \code{any}\cr
+More specific parameters controlling the embedding.
+If \code{NULL} these values are set automatically as determined by \code{min_dist} and \code{spread}.
+Default is \code{NULL}.
+\item \code{nn_method} :: \code{character(1)} | named \code{list()} | matrix\cr
+Method for finding nearest neighbors. Default is \code{NULL}.
+\item \code{n_trees} :: \code{integer(1)}\cr
+Number of trees to build when constructing the nearest neighbor index. Default is \code{50}.
+\item \code{search_k} :: \code{integer(1)}\cr
+Number of nodes to search during the neighbor retrieval.
+\item \code{approx_pow} :: \code{logical(1)}\cr
+If \code{TRUE}, use an approximation to the power function in the UMAP gradient.
+Ignored if \code{dens_scale} is non-NULL. Default is \code{FALSE}.
 \item \code{y} :: \code{character(1)}\cr
-Blah
-\item \code{target_n_neighbors} :: \code{character(1)}\cr
-Blah
+Default is \code{NULL}.
+\item \code{target_n_neighbors} :: \code{integer(1)}\cr
+Number of nearest neighbors to use to construct the target simplicial set. Default is \code{NULL}.
 \item \code{target_metric} :: \code{character(1)}\cr
-Blah
-\item \code{target_weight} :: \code{character(1)}\cr
-Blah
-\item \code{pca} :: \code{character(1)}\cr
-Blah
-\item \code{pca_center} :: \code{character(1)}\cr
-Blah
-\item \code{pca_rand} :: \code{character(1)}\cr
-Blah
-\item \code{fast_sgd} :: \code{character(1)}\cr
-Blah
-\item \code{n_threads} :: \code{character(1)}\cr
-Blah
-\item \code{n_sgd_threads} :: \code{character(1)}\cr
-Blah
-\item \code{grain_size} :: \code{character(1)}\cr
-Blah
-\item \code{verbose} :: \code{character(1)}\cr
-Blah
-\item \code{batch} :: \code{character(1)}\cr
-Blah
-\item \code{opt_args} :: \code{character(1)}\cr
-Blah
-\item \code{epoch_callback} :: \code{character(1)}\cr
-Blah
+The metric used to measure distance for \code{y} if using supervised dimension reduction.
+Used only if \code{y} is numeric.
+\item \code{target_weight} :: \code{numeric(1)}\cr
+Weighting factor between data topology and target topology. Default is \code{0.5}.
+\item \code{pca} :: \code{integer(1)}\cr
+Default is \code{NULL}.
+\item \code{pca_center} :: \code{logical(1)}\cr
+If \code{TRUE}, center the columns of X before carrying out PCA.
+For binary data, it's recommended to set this to \code{FALSE}. Default is \code{TRUE}.
+\item \code{pca_rand} :: \code{logical(1)}\cr
+Default is \code{TRUE}.
+\item \code{fast_sgd} :: \code{logical(1)}\cr
+Default is \code{FALSE}.
+\item \code{n_threads} :: \code{integer(1)}\cr
+Default is \code{NULL}.
+\item \code{n_sgd_threads} :: \code{integer(1)}\cr
+Default is \code{0}.
+\item \code{grain_size} :: \code{integer(1)}\cr
+Default is \code{1}.
+\item \code{verbose} :: \code{logical(1)}\cr
+Should details be logged to the console? Initialzed to \code{FALSE}.
+\item \code{batch} :: \code{logical(1)}\cr
+Default is \code{FALSE}.
+\item \code{opt_args} :: named \code{list()}\cr
+Default is \code{NULL}.
+\item \code{epoch_callback} :: \code{function}\cr
+Default is \code{NULL}.
 \item \code{pca_method} :: \code{character(1)}\cr
-Blah
-\item \code{binary_edge_weights} :: \code{character(1)}\cr
-Blah
-\item \code{dens_scale} :: \code{character(1)}\cr
-Blah
-\item \code{seed} :: \code{character(1)}\cr
-Blah
-\item \code{nn_args} :: \code{character(1)}\cr
-Blah
+Default is \code{NULL}.
+\item \code{binary_edge_weights} :: \code{logical(1)}\cr
+Default is \code{FALSE}.
+\item \code{dens_scale} :: \code{numeric(1)}\cr
+Default is \code{NULL}.
+\item \code{seed} :: \code{integer(1)}\cr
+Default is \code{NULL}.
+\item \code{nn_args} :: named \code{list()}\cr
+Default is \code{NULL}.
 }
 }
 

From 7fbd496653ea2df96cd280cdb8db5a4fd17e60d7 Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 11:26:29 +0200
Subject: [PATCH 05/36] docs: only run the examples if uwot available

---
 R/PipeOpUMAP.R                    | 2 ++
 man/mlr_pipeops_umap.Rd           | 2 ++
 tests/testthat/test_pipeop_umap.R | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index b029303a0..6c1a53f90 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -142,6 +142,7 @@
 #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
 #'
 #' @examples
+#' \dontshow{ if (requireNamespace("uwot")) \{ }
 #' library("mlr3")
 #'
 #' task = tsk("iris")
@@ -151,6 +152,7 @@
 #' pop$train(list(task))[[1]]$data()
 #'
 #' pop$state
+#' \dontshow{ \} }
 #' @family PipeOps
 #' @template seealso_pipeopslist
 #' @include PipeOpTaskPreproc.R
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 541f57159..b82b642ad 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -162,6 +162,7 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}
 }
 
 \examples{
+\dontshow{ if (requireNamespace("uwot")) \{ }
 library("mlr3")
 
 task = tsk("iris")
@@ -171,6 +172,7 @@ task$data()
 pop$train(list(task))[[1]]$data()
 
 pop$state
+\dontshow{ \} }
 }
 \seealso{
 https://mlr-org.com/pipeops.html
diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
index 0faf5e36c..eef01e201 100644
--- a/tests/testthat/test_pipeop_umap.R
+++ b/tests/testthat/test_pipeop_umap.R
@@ -1,7 +1,7 @@
 context("PipeOpUMAP")
 
 test_that("PipeOpUMAP - basic properties", {
+  skip_if_not_installed("uwot")
   op = PipeOpUMAP$new()
-  task = mlr_tasks$get("iris")
   expect_pipeop(op)
 })

From 0122c89a901b842b56e189a0a05b09cc37d35340 Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 12:06:39 +0200
Subject: [PATCH 06/36] docs: more docs

---
 R/PipeOpUMAP.R          | 72 +++++++++++++++++++++++++----------------
 man/mlr_pipeops_umap.Rd | 72 +++++++++++++++++++++++++----------------
 2 files changed, 88 insertions(+), 56 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 6c1a53f90..23f08b465 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -24,7 +24,7 @@
 #' The output is the input [`Task`][mlr3::Task] with all affected numeric features replaced by their principal components.
 #'
 #' @section State:
-#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the class [stats::prcomp],
+#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the class [uwot::umap2],
 #' with the exception of the `$x` slot. These are in particular:
 #' * `sdev` :: `numeric`\cr
 #'   The standard deviations of the principal components.
@@ -39,101 +39,117 @@
 #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
 #' * `n_neighbors` :: `integer(1)`\cr
 #'   The size of the neighborhood used for manifold approximation. Default is `15`.
+#'   For details see [uwot::umap2()].
 #' * `n_components` :: `integer(1)`\cr
-#'   The dimension of the space to embed into. Default is `2`.
+#'   The dimension of the space to embed into. Default is `2`. For details see [uwot::umap2()].
 #' * `metric` :: `character(1)`\cr
 #'   Type of distance metric to use to find nearest neighbors. Default is `"euclidean"`.
+#'   For details see [uwot::umap2()].
 #' * `n_epochs` :: `integer(1)`\cr
-#'   Number of epochs to use during the optimization of the embedded coordinates.
-#'   By default, this value is set to 500 for datasets containing 10,000 vertices or less,
-#'   and 200 otherwise. If n_epochs = 0, then coordinates determined by "init" will be returned.
+#'   Number of epochs to use during the optimization of the embedded coordinates. Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `learning_rate` :: `numeric(1)`\cr
 #'   Initial learning rate used in optimization of the coordinates. Default is `1`.
+#'   For details see [uwot::umap2()].
 #' * `init` :: `character(1)` | `matrix`\cr
 #'   Type of initialization for the coordinates. Default is `"spectral"`.
+#'   For details see [uwot::umap2()].
 #' * `init_sdev` :: `character(1)` | `numeric(1)`\cr
 #'   Scales each dimension of the initialized coordinates to this standard deviation.
-#'   Default is `"range"`.
+#'   Default is `"range"`. For details see [uwot::umap2()].
 #' * `spread` :: `numeric(1)`\cr
-#'   The effective scale of embedded points. In combination with `min_dist`,
-#'   this determines how clustered/clumped the embedded points are. Default is `1`.
+#'   The effective scale of embedded points. Default is `1`. For details see [uwot::umap2()].
 #' * `min_dist` :: `numeric(1)`\cr
 #'   The effective minimum distance between embedded points. Default is `0.01`.
+#'   For details see [uwot::umap2()].
 #' * `set_op_mix_ratio` :: `numeric(1)`\cr
 #'   Interpolate between (fuzzy) union and intersection as the set operation used to
 #'   combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is `1`.
+#'   For details see [uwot::umap2()].
 #' * `local_connectivity` :: `numeric(1)`\cr
 #'   The local connectivity required – i.e. the number of nearest neighbors that should be
-#'   assumed to be connected at a local level. Default is `1`.
+#'   assumed to be connected at a local level. Default is `1`. For details see [uwot::umap2()].
 #' * `bandwidth` :: `numeric(1)`\cr
 #'   The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps.
-#'   Default is `1`.
+#'   Default is `1`. For details see [uwot::umap2()].
 #' * `repulsion_strength` :: `numeric(1)`\cr
 #'   Weighting applied to negative samples in low dimensional embedding optimization.
 #'   Values higher than one will result in greater weight being given to negative samples.
-#'   Default is `1`.
+#'   Default is `1`. For details see [uwot::umap2()].
 #' * `negative_sample_rate` :: `numeric(1)`\cr
 #'   The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
-#'   in optimizing the low dimensional embedding. Default is `5`.
+#'   in optimizing the low dimensional embedding. Default is `5`. For details see [uwot::umap2()].
 #' * `a` :: `any`\cr
-#'   More specific parameters controlling the embedding.
-#'   If `NULL` these values are set automatically as determined by `min_dist` and `spread`.
-#'   Default is `NULL`.
+#'   More specific parameters controlling the embedding. Default is `NULL`. For details see [uwot::umap2()].
 #' * `b` :: `any`\cr
-#'   More specific parameters controlling the embedding.
-#'   If `NULL` these values are set automatically as determined by `min_dist` and `spread`.
-#'   Default is `NULL`.
+#'   More specific parameters controlling the embedding. Default is `NULL`. For details see [uwot::umap2()].
 #' * `nn_method` :: `character(1)` | named `list()` | matrix\cr
-#'   Method for finding nearest neighbors. Default is `NULL`.
+#'   Method for finding nearest neighbors. Default is `NULL`. For details see [uwot::umap2()].
 #' * `n_trees` :: `integer(1)`\cr
 #'   Number of trees to build when constructing the nearest neighbor index. Default is `50`.
+#'   For details see [uwot::umap2()].
 #' * `search_k` :: `integer(1)`\cr
-#'   Number of nodes to search during the neighbor retrieval.
+#'   Number of nodes to search during the neighbor retrieval. For details see [uwot::umap2()].
 #' * `approx_pow` :: `logical(1)`\cr
-#'   If `TRUE`, use an approximation to the power function in the UMAP gradient.
-#'   Ignored if `dens_scale` is non-NULL. Default is `FALSE`.
+#'   If `TRUE`, use an approximation to the power function in the UMAP gradient. Default is `FALSE`.
+#'   For details see [uwot::umap2()].
 #' * `y` :: `character(1)`\cr
-#'   Default is `NULL`.
+#'   Default is `NULL`. For details see [uwot::umap2()].
 #' * `target_n_neighbors` :: `integer(1)`\cr
 #'   Number of nearest neighbors to use to construct the target simplicial set. Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `target_metric` :: `character(1)`\cr
 #'   The metric used to measure distance for `y` if using supervised dimension reduction.
-#'   Used only if `y` is numeric.
+#'   For details see [uwot::umap2()].
 #' * `target_weight` :: `numeric(1)`\cr
 #'   Weighting factor between data topology and target topology. Default is `0.5`.
+#'   For details see [uwot::umap2()].
 #' * `pca` :: `integer(1)`\cr
-#'   Default is `NULL`.
+#'   Default is `NULL`. For details see [uwot::umap2()].
 #' * `pca_center` :: `logical(1)`\cr
-#'   If `TRUE`, center the columns of X before carrying out PCA.
-#'   For binary data, it's recommended to set this to `FALSE`. Default is `TRUE`.
+#'   If `TRUE`, center the columns of X before carrying out PCA. Default is `TRUE`.
+#'   For details see [uwot::umap2()].
 #' * `pca_rand` :: `logical(1)`\cr
 #'   Default is `TRUE`.
+#'   For details see [uwot::umap2()].
 #' * `fast_sgd` :: `logical(1)`\cr
 #'   Default is `FALSE`.
+#'   For details see [uwot::umap2()].
 #' * `n_threads` :: `integer(1)`\cr
 #'   Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `n_sgd_threads` :: `integer(1)`\cr
 #'   Default is `0`.
+#'   For details see [uwot::umap2()].
 #' * `grain_size` :: `integer(1)`\cr
 #'   Default is `1`.
+#'   For details see [uwot::umap2()].
 #' * `verbose` :: `logical(1)`\cr
-#'    Should details be logged to the console? Initialzed to `FALSE`.
+#'   Should details be printed? Initialzed to `FALSE`. For details see [uwot::umap2()].
 #' * `batch` :: `logical(1)`\cr
 #'   Default is `FALSE`.
+#'   For details see [uwot::umap2()].
 #' * `opt_args` :: named `list()`\cr
 #'   Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `epoch_callback` :: `function`\cr
 #'   Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `pca_method` :: `character(1)`\cr
 #'   Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `binary_edge_weights` :: `logical(1)`\cr
 #'   Default is `FALSE`.
+#'   For details see [uwot::umap2()].
 #' * `dens_scale` :: `numeric(1)`\cr
 #'   Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `seed` :: `integer(1)`\cr
 #'   Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `nn_args` :: named `list()`\cr
 #'   Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #'
 #' @section Internals:
 #' Uses the [`umap()`][uwot::umap] function.
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index b82b642ad..78f79dfd1 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -33,7 +33,7 @@ The output is the input \code{\link[mlr3:Task]{Task}} with all affected numeric
 
 \section{State}{
 
-The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the class \link[stats:prcomp]{stats::prcomp},
+The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the class \link[uwot:umap2]{uwot::umap2},
 with the exception of the \verb{$x} slot. These are in particular:
 \itemize{
 \item \code{sdev} :: \code{numeric}\cr
@@ -53,101 +53,117 @@ The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}
 \itemize{
 \item \code{n_neighbors} :: \code{integer(1)}\cr
 The size of the neighborhood used for manifold approximation. Default is \code{15}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_components} :: \code{integer(1)}\cr
-The dimension of the space to embed into. Default is \code{2}.
+The dimension of the space to embed into. Default is \code{2}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{metric} :: \code{character(1)}\cr
 Type of distance metric to use to find nearest neighbors. Default is \code{"euclidean"}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_epochs} :: \code{integer(1)}\cr
-Number of epochs to use during the optimization of the embedded coordinates.
-By default, this value is set to 500 for datasets containing 10,000 vertices or less,
-and 200 otherwise. If n_epochs = 0, then coordinates determined by "init" will be returned.
+Number of epochs to use during the optimization of the embedded coordinates. Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{learning_rate} :: \code{numeric(1)}\cr
 Initial learning rate used in optimization of the coordinates. Default is \code{1}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{init} :: \code{character(1)} | \code{matrix}\cr
 Type of initialization for the coordinates. Default is \code{"spectral"}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{init_sdev} :: \code{character(1)} | \code{numeric(1)}\cr
 Scales each dimension of the initialized coordinates to this standard deviation.
-Default is \code{"range"}.
+Default is \code{"range"}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{spread} :: \code{numeric(1)}\cr
-The effective scale of embedded points. In combination with \code{min_dist},
-this determines how clustered/clumped the embedded points are. Default is \code{1}.
+The effective scale of embedded points. Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{min_dist} :: \code{numeric(1)}\cr
 The effective minimum distance between embedded points. Default is \code{0.01}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{set_op_mix_ratio} :: \code{numeric(1)}\cr
 Interpolate between (fuzzy) union and intersection as the set operation used to
 combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is \code{1}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{local_connectivity} :: \code{numeric(1)}\cr
 The local connectivity required – i.e. the number of nearest neighbors that should be
-assumed to be connected at a local level. Default is \code{1}.
+assumed to be connected at a local level. Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{bandwidth} :: \code{numeric(1)}\cr
 The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps.
-Default is \code{1}.
+Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{repulsion_strength} :: \code{numeric(1)}\cr
 Weighting applied to negative samples in low dimensional embedding optimization.
 Values higher than one will result in greater weight being given to negative samples.
-Default is \code{1}.
+Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{negative_sample_rate} :: \code{numeric(1)}\cr
 The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
-in optimizing the low dimensional embedding. Default is \code{5}.
+in optimizing the low dimensional embedding. Default is \code{5}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{a} :: \code{any}\cr
-More specific parameters controlling the embedding.
-If \code{NULL} these values are set automatically as determined by \code{min_dist} and \code{spread}.
-Default is \code{NULL}.
+More specific parameters controlling the embedding. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{b} :: \code{any}\cr
-More specific parameters controlling the embedding.
-If \code{NULL} these values are set automatically as determined by \code{min_dist} and \code{spread}.
-Default is \code{NULL}.
+More specific parameters controlling the embedding. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_method} :: \code{character(1)} | named \code{list()} | matrix\cr
-Method for finding nearest neighbors. Default is \code{NULL}.
+Method for finding nearest neighbors. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_trees} :: \code{integer(1)}\cr
 Number of trees to build when constructing the nearest neighbor index. Default is \code{50}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{search_k} :: \code{integer(1)}\cr
-Number of nodes to search during the neighbor retrieval.
+Number of nodes to search during the neighbor retrieval. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{approx_pow} :: \code{logical(1)}\cr
-If \code{TRUE}, use an approximation to the power function in the UMAP gradient.
-Ignored if \code{dens_scale} is non-NULL. Default is \code{FALSE}.
+If \code{TRUE}, use an approximation to the power function in the UMAP gradient. Default is \code{FALSE}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{y} :: \code{character(1)}\cr
-Default is \code{NULL}.
+Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_n_neighbors} :: \code{integer(1)}\cr
 Number of nearest neighbors to use to construct the target simplicial set. Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_metric} :: \code{character(1)}\cr
 The metric used to measure distance for \code{y} if using supervised dimension reduction.
-Used only if \code{y} is numeric.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_weight} :: \code{numeric(1)}\cr
 Weighting factor between data topology and target topology. Default is \code{0.5}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca} :: \code{integer(1)}\cr
-Default is \code{NULL}.
+Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_center} :: \code{logical(1)}\cr
-If \code{TRUE}, center the columns of X before carrying out PCA.
-For binary data, it's recommended to set this to \code{FALSE}. Default is \code{TRUE}.
+If \code{TRUE}, center the columns of X before carrying out PCA. Default is \code{TRUE}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_rand} :: \code{logical(1)}\cr
 Default is \code{TRUE}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{fast_sgd} :: \code{logical(1)}\cr
 Default is \code{FALSE}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_threads} :: \code{integer(1)}\cr
 Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_sgd_threads} :: \code{integer(1)}\cr
 Default is \code{0}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{grain_size} :: \code{integer(1)}\cr
 Default is \code{1}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{verbose} :: \code{logical(1)}\cr
-Should details be logged to the console? Initialzed to \code{FALSE}.
+Should details be printed? Initialzed to \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{batch} :: \code{logical(1)}\cr
 Default is \code{FALSE}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{opt_args} :: named \code{list()}\cr
 Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{epoch_callback} :: \code{function}\cr
 Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_method} :: \code{character(1)}\cr
 Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{binary_edge_weights} :: \code{logical(1)}\cr
 Default is \code{FALSE}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{dens_scale} :: \code{numeric(1)}\cr
 Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{seed} :: \code{integer(1)}\cr
 Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_args} :: named \code{list()}\cr
 Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 }
 }
 

From 0017c4e0918f675cb7e4ac2a34cfaf421710c6f4 Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 12:20:42 +0200
Subject: [PATCH 07/36] docs: finish param docs

---
 R/PipeOpUMAP.R          | 54 +++++++++++++++++++++------------------
 man/mlr_pipeops_umap.Rd | 56 +++++++++++++++++++++++------------------
 2 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 23f08b465..a15e0774f 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -74,7 +74,6 @@
 #'   Default is `1`. For details see [uwot::umap2()].
 #' * `repulsion_strength` :: `numeric(1)`\cr
 #'   Weighting applied to negative samples in low dimensional embedding optimization.
-#'   Values higher than one will result in greater weight being given to negative samples.
 #'   Default is `1`. For details see [uwot::umap2()].
 #' * `negative_sample_rate` :: `numeric(1)`\cr
 #'   The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
@@ -93,8 +92,9 @@
 #' * `approx_pow` :: `logical(1)`\cr
 #'   If `TRUE`, use an approximation to the power function in the UMAP gradient. Default is `FALSE`.
 #'   For details see [uwot::umap2()].
-#' * `y` :: `character(1)`\cr
-#'   Default is `NULL`. For details see [uwot::umap2()].
+#' * `y` :: `any`\cr
+#'   Optional target data for supervised dimension reduction. Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `target_n_neighbors` :: `integer(1)`\cr
 #'   Number of nearest neighbors to use to construct the target simplicial set. Default is `NULL`.
 #'   For details see [uwot::umap2()].
@@ -105,51 +105,55 @@
 #'   Weighting factor between data topology and target topology. Default is `0.5`.
 #'   For details see [uwot::umap2()].
 #' * `pca` :: `integer(1)`\cr
-#'   Default is `NULL`. For details see [uwot::umap2()].
+#'   Redude data to this number of columns using PCA. Default is `NULL`.
+#'   For details see [uwot::umap2()].
 #' * `pca_center` :: `logical(1)`\cr
 #'   If `TRUE`, center the columns of X before carrying out PCA. Default is `TRUE`.
 #'   For details see [uwot::umap2()].
 #' * `pca_rand` :: `logical(1)`\cr
-#'   Default is `TRUE`.
-#'   For details see [uwot::umap2()].
+#'   If `TRUE`, use the PCG random number generator (O'Neill, 2014) during optimization.
+#'   Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator.
+#'   Default is `TRUE`. For details see [uwot::umap2()].
 #' * `fast_sgd` :: `logical(1)`\cr
-#'   Default is `FALSE`.
-#'   For details see [uwot::umap2()].
+#'   If `TRUE`, then the following combination of parameters is set:
+#'   * `pcg_rand = TRUE`
+#'   * `n_sgd_threads = "auto"`
+#'   * `approx_pow = TRUE`
+#'   Default is `FALSE`. For details see [uwot::umap2()].
 #' * `n_threads` :: `integer(1)`\cr
-#'   Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   Number of threads to use. Default is `NULL`. For details see [uwot::umap2()].
 #' * `n_sgd_threads` :: `integer(1)`\cr
-#'   Default is `0`.
+#'   Number of threads to use during stochastic gradient descent. Default is `0`.
 #'   For details see [uwot::umap2()].
 #' * `grain_size` :: `integer(1)`\cr
-#'   Default is `1`.
+#'   The minimum amount of work to do on each thread. Default is `1`.
 #'   For details see [uwot::umap2()].
 #' * `verbose` :: `logical(1)`\cr
 #'   Should details be printed? Initialzed to `FALSE`. For details see [uwot::umap2()].
 #' * `batch` :: `logical(1)`\cr
-#'   Default is `FALSE`.
-#'   For details see [uwot::umap2()].
+#'   If `TRUE`, then embedding coordinates are updated at the end of each epoch rather
+#'   than during the epoch. Default is `FALSE`. For details see [uwot::umap2()].
 #' * `opt_args` :: named `list()`\cr
-#'   Default is `NULL`.
+#'   A list of optimizer parameters, used when `batch = TRUE`. Default is `NULL`.
 #'   For details see [uwot::umap2()].
 #' * `epoch_callback` :: `function`\cr
-#'   Default is `NULL`.
+#'   A function which will be invoked at the end of every epoch. Default is `NULL`.
 #'   For details see [uwot::umap2()].
 #' * `pca_method` :: `character(1)`\cr
-#'   Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   Method to carry out any PCA dimensionality reduction when the `pca` is specified.
+#'   Default is `NULL`. For details see [uwot::umap2()].
 #' * `binary_edge_weights` :: `logical(1)`\cr
-#'   Default is `FALSE`.
-#'   For details see [uwot::umap2()].
+#'   If TRUE then edge weights in the input graph are treated as binary (0/1) rather than real valued.
+#'   Default is `FALSE`. For details see [uwot::umap2()].
 #' * `dens_scale` :: `numeric(1)`\cr
-#'   Default is `NULL`.
+#'   A scaling factor to apply to the density of the input data. Default is `NULL`.
 #'   For details see [uwot::umap2()].
 #' * `seed` :: `integer(1)`\cr
-#'   Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   Integer seed to use to initialize the random number generator state.
+#'   Default is `NULL`. For details see [uwot::umap2()].
 #' * `nn_args` :: named `list()`\cr
-#'   Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   A list containing additional arguments to pass to the nearest neighbor method.
+#'   Default is `NULL`. For details see [uwot::umap2()].
 #'
 #' @section Internals:
 #' Uses the [`umap()`][uwot::umap] function.
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 78f79dfd1..8b87a30ac 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -88,7 +88,6 @@ The effective bandwidth of the kernel if we view the algorithm as similar to Lap
 Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{repulsion_strength} :: \code{numeric(1)}\cr
 Weighting applied to negative samples in low dimensional embedding optimization.
-Values higher than one will result in greater weight being given to negative samples.
 Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{negative_sample_rate} :: \code{numeric(1)}\cr
 The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
@@ -107,8 +106,9 @@ Number of nodes to search during the neighbor retrieval. For details see \code{\
 \item \code{approx_pow} :: \code{logical(1)}\cr
 If \code{TRUE}, use an approximation to the power function in the UMAP gradient. Default is \code{FALSE}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
-\item \code{y} :: \code{character(1)}\cr
-Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+\item \code{y} :: \code{any}\cr
+Optional target data for supervised dimension reduction. Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_n_neighbors} :: \code{integer(1)}\cr
 Number of nearest neighbors to use to construct the target simplicial set. Default is \code{NULL}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
@@ -119,51 +119,57 @@ For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 Weighting factor between data topology and target topology. Default is \code{0.5}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca} :: \code{integer(1)}\cr
-Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Redude data to this number of columns using PCA. Default is \code{NULL}.
+For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_center} :: \code{logical(1)}\cr
 If \code{TRUE}, center the columns of X before carrying out PCA. Default is \code{TRUE}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_rand} :: \code{logical(1)}\cr
-Default is \code{TRUE}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+If \code{TRUE}, use the PCG random number generator (O'Neill, 2014) during optimization.
+Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator.
+Default is \code{TRUE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{fast_sgd} :: \code{logical(1)}\cr
-Default is \code{FALSE}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+If \code{TRUE}, then the following combination of parameters is set:
+\itemize{
+\item \code{pcg_rand = TRUE}
+\item \code{n_sgd_threads = "auto"}
+\item \code{approx_pow = TRUE}
+Default is \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+}
 \item \code{n_threads} :: \code{integer(1)}\cr
-Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Number of threads to use. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_sgd_threads} :: \code{integer(1)}\cr
-Default is \code{0}.
+Number of threads to use during stochastic gradient descent. Default is \code{0}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{grain_size} :: \code{integer(1)}\cr
-Default is \code{1}.
+The minimum amount of work to do on each thread. Default is \code{1}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{verbose} :: \code{logical(1)}\cr
 Should details be printed? Initialzed to \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{batch} :: \code{logical(1)}\cr
-Default is \code{FALSE}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+If \code{TRUE}, then embedding coordinates are updated at the end of each epoch rather
+than during the epoch. Default is \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{opt_args} :: named \code{list()}\cr
-Default is \code{NULL}.
+A list of optimizer parameters, used when \code{batch = TRUE}. Default is \code{NULL}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{epoch_callback} :: \code{function}\cr
-Default is \code{NULL}.
+A function which will be invoked at the end of every epoch. Default is \code{NULL}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_method} :: \code{character(1)}\cr
-Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Method to carry out any PCA dimensionality reduction when the \code{pca} is specified.
+Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{binary_edge_weights} :: \code{logical(1)}\cr
-Default is \code{FALSE}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+If TRUE then edge weights in the input graph are treated as binary (0/1) rather than real valued.
+Default is \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{dens_scale} :: \code{numeric(1)}\cr
-Default is \code{NULL}.
+A scaling factor to apply to the density of the input data. Default is \code{NULL}.
 For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{seed} :: \code{integer(1)}\cr
-Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Integer seed to use to initialize the random number generator state.
+Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_args} :: named \code{list()}\cr
-Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+A list containing additional arguments to pass to the nearest neighbor method.
+Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
 }
 }
 

From 7a17a3a733e33caed992ac83535ec7963f51224f Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 12:49:11 +0200
Subject: [PATCH 08/36] docs: init docs for state

---
 R/PipeOpUMAP.R          | 67 +++++++++++++++++++++++++++++++++++------
 man/mlr_pipeops_umap.Rd | 66 ++++++++++++++++++++++++++++++++++------
 2 files changed, 113 insertions(+), 20 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index a15e0774f..d23f1222a 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -24,16 +24,62 @@
 #' The output is the input [`Task`][mlr3::Task] with all affected numeric features replaced by their principal components.
 #'
 #' @section State:
-#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the class [uwot::umap2],
-#' with the exception of the `$x` slot. These are in particular:
-#' * `sdev` :: `numeric`\cr
-#'   The standard deviations of the principal components.
-#' * `rotation` :: `matrix`\cr
-#'   The matrix of variable loadings.
-#' * `center` :: `numeric` | `logical(1)`\cr
-#'   The centering used, or `FALSE`.
-#' * `scale` :: `numeric` | `logical(1)`\cr
-#'   The scaling used, or `FALSE`.
+#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the class [uwot::umap2].
+#' These are in particular:
+#' * `embedding` :: `matrix`\cr
+#'   Blah
+#' * `scale_info` :: `any`\cr
+#'   Blah
+#' * `search_k` :: `numeric(1)`\cr
+#'   Blah
+#' * `local_connectivity` :: `numeric(1)`\cr
+#'   Blah
+#' * `n_epochs` :: `numeric(1)`\cr
+#'   Blah
+#' * `alpha` :: `numeric(1)`\cr
+#'   Blah
+#' * `negative_sample_rate` :: `numeric(1)`\cr
+#'   Blah
+#' * `method` :: `character(1)`\cr
+#'   Blah
+#' * `a` :: named `numeric(1)`\cr
+#'   Blah
+#' * `b` :: named `numeric(1)`\cr
+#'   Blah
+#' * `gamma` :: `numeric(1)`\cr
+#'   Blah
+#' * `approx_pow` :: `logical(1)`\cr
+#'   Blah
+#' * `metric` :: named `list()`\cr
+#'   Blah
+#' * `norig_col` :: `integer(1)`\cr
+#'   Blah
+#' * `pcg_rand` :: `logical(1)`\cr
+#'   Blah
+#' * `batch` :: `logical(1)`\cr
+#'   Blah
+#' * `opt_args` :: named `list()`\cr
+#'   Blah
+#' * `num_precomputed_nns` :: `numeric(1)`\cr
+#'   Blah
+#' * `min_dist` :: `numeric(1)`\cr
+#'   Blah
+#' * `spread` :: `numeric(1)`\cr
+#'   Blah
+#' * `binary_edge_weights` :: `logical(1)`\cr
+#'   Blah
+#' * `seed` :: `integer(1)`\cr
+#'   Blah
+#' * `nn_method` :: `any`\cr
+#'   Blah
+#' * `nn_args` :: `list()`\cr
+#'   Blah
+#' * `n_neighbors` :: `numeric(1)`\cr
+#'   Blah
+#' * `nn_index` :: named `list()`\cr
+#'   Blah
+#' * `pca_models` :: `list()`\cr
+#'   Blah
 #'
 #' @section Parameters:
 #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
@@ -253,6 +299,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
     .train_dt = function(dt, levels, target) {
       params = insert_named(self$param_set$get_values(tags = "umap"), list(ret_model = TRUE))
       umap = invoke(uwot::umap2, dt, .args = params)
+      browser()
       self$state = umap
       umap$embedding
     },
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 8b87a30ac..134824f43 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -33,17 +33,63 @@ The output is the input \code{\link[mlr3:Task]{Task}} with all affected numeric
 
 \section{State}{
 
-The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the class \link[uwot:umap2]{uwot::umap2},
-with the exception of the \verb{$x} slot. These are in particular:
+The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the class \link[uwot:umap2]{uwot::umap2}.
+These are in particular:
 \itemize{
-\item \code{sdev} :: \code{numeric}\cr
-The standard deviations of the principal components.
-\item \code{rotation} :: \code{matrix}\cr
-The matrix of variable loadings.
-\item \code{center} :: \code{numeric} | \code{logical(1)}\cr
-The centering used, or \code{FALSE}.
-\item \code{scale} :: \code{numeric} | \code{logical(1)}\cr
-The scaling used, or \code{FALSE}.
+\item \code{embedding} :: \code{matrix}\cr
+Blah
+\item \code{scale_info} :: \code{any}\cr
+Blah
+\item \code{search_k} :: \code{numeric(1)}\cr
+Blah
+\item \code{local_connectivity} :: \code{numeric(1)}\cr
+Blah
+\item \code{n_epochs} :: \code{numeric(1)}\cr
+Blah
+\item \code{alpha} :: \code{numeric(1)}\cr
+Blah
+\item \code{negative_sample_rate} :: \code{numeric(1)}\cr
+Blah
+\item \code{method} :: \code{character(1)}\cr
+Blah
+\item \code{a} :: named \code{numeric(1)}\cr
+Blah
+\item \code{b} :: named \code{numeric(1)}\cr
+Blah
+\item \code{gamma} :: \code{numeric(1)}\cr
+Blah
+\item \code{approx_pow} :: \code{logical(1)}\cr
+Blah
+\item \code{metric} :: named \code{list()}\cr
+Blah
+\item \code{norig_col} :: \code{integer(1)}\cr
+Blah
+\item \code{pcg_rand} :: \code{logical(1)}\cr
+Blah
+\item \code{batch} :: \code{logical(1)}\cr
+Blah
+\item \code{opt_args} :: named \code{list()}\cr
+Blah
+\item \code{num_precomputed_nns} :: \code{numeric(1)}\cr
+Blah
+\item \code{min_dist} :: \code{numeric(1)}\cr
+Blah
+\item \code{spread} :: \code{numeric(1)}\cr
+Blah
+\item \code{binary_edge_weights} :: \code{logical(1)}\cr
+Blah
+\item \code{seed} :: \code{integer(1)}\cr
+Blah
+\item \code{nn_method} :: \code{any}\cr
+Blah
+\item \code{nn_args} :: \code{list()}\cr
+Blah
+\item \code{n_neighbors} :: \code{numeric(1)}\cr
+Blah
+\item \code{nn_index} :: named \code{list()}\cr
+Blah
+\item \code{pca_models} :: \code{list()}\cr
+Blah
 }
 }
 

From 4188dcde17f3024345e377929af2f934f979fec0 Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 17:00:21 +0200
Subject: [PATCH 09/36] fix: remove browser call

---
 R/PipeOpUMAP.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index d23f1222a..e6ff02c61 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -299,7 +299,6 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
     .train_dt = function(dt, levels, target) {
       params = insert_named(self$param_set$get_values(tags = "umap"), list(ret_model = TRUE))
       umap = invoke(uwot::umap2, dt, .args = params)
-      browser()
       self$state = umap
       umap$embedding
     },

From 56e43fe2fcae286c88e13d0be596cda22849adfb Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 20:00:25 +0200
Subject: [PATCH 10/36] feat(umap): allow more metrics and remove depend

---
 R/PipeOpUMAP.R | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index e6ff02c61..90107ad0f 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -231,10 +231,14 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         n_neighbors = p_int(2L, 100L, default = 15L, tags = c("train", "umap")),
         n_components = p_int(1L, 100L, default = 2L, tags = c("train", "umap")),
         metric = p_fct(
-          c("euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical"),
+          levels = c(
+            "euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical",
+            "braycurtis", "canberra", "chebyshev", "dice", "hamming", "hellinger", "jaccard",
+            "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
+            "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
+          ),
           default = "euclidean",
-          tags = c("train", "umap"),
-          depends = quote(nn_method == "hnsw")
+          tags = c("train", "umap")
         ),
         n_epochs = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         learning_rate = p_dbl(0, default = 1, tags = c("train", "umap")),

From 9ae3780783916eb733918f116eab3bd67c118992 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 30 Jul 2024 20:11:41 +0200
Subject: [PATCH 11/36] docs: added most state parameters

---
 R/PipeOpUMAP.R | 157 +++++++++++++++++++++++++++----------------------
 1 file changed, 86 insertions(+), 71 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index d23f1222a..0ca74f894 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -6,7 +6,7 @@
 #'
 #' @description
 #' Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP).
-#' See [uwot::umap2()] for details.
+#' See [uwot::umap2()] For details,.
 #'
 #' @section Construction:
 #' ```
@@ -24,58 +24,65 @@
 #' The output is the input [`Task`][mlr3::Task] with all affected numeric features replaced by their principal components.
 #'
 #' @section State:
-#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the class [uwot::umap2].
+#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as the elements of the list
+#' returned from [uwot::umap2].
 #' These are in particular:
 #' * `embedding` :: `matrix`\cr
-#'   Blah
+#'   Matrix of embedded coordinates.
 #' * `scale_info` :: `any`\cr
 #'   Blah
 #' * `search_k` :: `numeric(1)`\cr
-#'   Blah
+#'   Number of nodes searched during the neighbor retrieval. Only used if the `nn_method` is `"annoy"`.
+#'   For details, see [uwot::umap2()].
 #' * `local_connectivity` :: `numeric(1)`\cr
-#'   Blah
+#'   Used local connectivity – i.e. the number of nearest neighbors that should be
+#'   assumed to be connected at a local level. For details, see [uwot::umap2()].
 #' * `n_epochs` :: `numeric(1)`\cr
-#'   Blah
+#'   Number of epochs used during the optimization of the embedded coordinates. For details, see [uwot::umap2()].
 #' * `alpha` :: `numeric(1)`\cr
-#'   Blah
+#'   Initial learning rate. For details, see [uwot::umap2()].
 #' * `negative_sample_rate` :: `numeric(1)`\cr
-#'   Blah
+#'   The number of negative edge/1-simplex samples used per positive edge/1-simplex sample
+#'   in optimizing the low dimensional embedding. For details, see [uwot::umap2()].
 #' * `method` :: `character(1)`\cr
 #'   Blah
 #' * `a` :: named `numeric(1)`\cr
-#'   Blah
+#'   More specific parameters controlling the embedding. For details, see [uwot::umap2()].
 #' * `b` :: named `numeric(1)`\cr
-#'   Blah
+#'   More specific parameters controlling the embedding. For details, see [uwot::umap2()].
 #' * `gamma` :: `numeric(1)`\cr
 #'   Blah
 #' * `approx_pow` :: `logical(1)`\cr
-#'   Blah
+#'   If `TRUE`, use an approximation to the power function in the UMAP gradient. For details, see [uwot::umap2()].
 #' * `metric` :: named `list()`\cr
-#'   Blah
+#'   Type of distance metric used to find nearest neighbors. For details, see [uwot::umap2()].
 #' * `norig_col` :: `integer(1)`\cr
-#'   Blah
+#'   Number of original columns.
 #' * `pcg_rand` :: `logical(1)`\cr
-#'   Blah
+#'   `TRUE`, if the PCG random number generator (O'Neill, 2014) was used during optimization.
+#'   Otherwise, Tausworthe "taus88" generator was used. For details, see [uwot::umap2()].
 #' * `batch` :: `logical(1)`\cr
-#'   Blah
+#'   `TRUE`, if embedding coordinates were updated at the end of each epoch rather
+#'   than during the epoch. For details, see [uwot::umap2()].
 #' * `opt_args` :: named `list()`\cr
-#'   Blah
+#'   Optimizer parameters, used when `batch = TRUE`. For details, see [uwot::umap2()].
 #' * `num_precomputed_nns` :: `numeric(1)`\cr
 #'   Blah
 #' * `min_dist` :: `numeric(1)`\cr
-#'   Blah
+#'   The effective minimum distance between embedded points. For details, see [uwot::umap2()].
 #' * `spread` :: `numeric(1)`\cr
-#'   Blah
+#'   The effective scale of embedded points. For details, see [uwot::umap2()].
 #' * `binary_edge_weights` :: `logical(1)`\cr
-#'   Blah
+#'   If `TRUE` then edge weights in the input graph were treated as binary (0/1) rather than real valued.
+#'   For details, see [uwot::umap2()].
 #' * `seed` :: `integer(1)`\cr
 #'   Blah
 #' * `nn_method` :: `any`\cr
-#'   Blah
+#'   Method for finding nearest neighbors. For details, see [uwot::umap2()].
 #' * `nn_args` :: `list()`\cr
 #'   Blah
 #' * `n_neighbors` :: `numeric(1)`\cr
-#'   Blah
+#'   The size of the neighborhood used for manifold approximation. For details, see [uwot::umap2()].
 #' * `nn_index` :: named `list()`\cr
 #'   Blah
 #' * `pca_models` :: `list()`\cr
@@ -85,124 +92,125 @@
 #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
 #' * `n_neighbors` :: `integer(1)`\cr
 #'   The size of the neighborhood used for manifold approximation. Default is `15`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `n_components` :: `integer(1)`\cr
-#'   The dimension of the space to embed into. Default is `2`. For details see [uwot::umap2()].
+#'   The dimension of the space to embed into. Default is `2`. For details, see [uwot::umap2()].
 #' * `metric` :: `character(1)`\cr
 #'   Type of distance metric to use to find nearest neighbors. Default is `"euclidean"`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `n_epochs` :: `integer(1)`\cr
 #'   Number of epochs to use during the optimization of the embedded coordinates. Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `learning_rate` :: `numeric(1)`\cr
 #'   Initial learning rate used in optimization of the coordinates. Default is `1`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `init` :: `character(1)` | `matrix`\cr
 #'   Type of initialization for the coordinates. Default is `"spectral"`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `init_sdev` :: `character(1)` | `numeric(1)`\cr
 #'   Scales each dimension of the initialized coordinates to this standard deviation.
-#'   Default is `"range"`. For details see [uwot::umap2()].
+#'   Default is `"range"`. For details, see [uwot::umap2()].
 #' * `spread` :: `numeric(1)`\cr
-#'   The effective scale of embedded points. Default is `1`. For details see [uwot::umap2()].
+#'   The effective scale of embedded points. Default is `1`. For details, see [uwot::umap2()].
 #' * `min_dist` :: `numeric(1)`\cr
 #'   The effective minimum distance between embedded points. Default is `0.01`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `set_op_mix_ratio` :: `numeric(1)`\cr
 #'   Interpolate between (fuzzy) union and intersection as the set operation used to
 #'   combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is `1`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `local_connectivity` :: `numeric(1)`\cr
 #'   The local connectivity required – i.e. the number of nearest neighbors that should be
-#'   assumed to be connected at a local level. Default is `1`. For details see [uwot::umap2()].
+#'   assumed to be connected at a local level. Default is `1`. For details, see [uwot::umap2()].
 #' * `bandwidth` :: `numeric(1)`\cr
 #'   The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps.
-#'   Default is `1`. For details see [uwot::umap2()].
+#'   Default is `1`. For details, see [uwot::umap2()].
 #' * `repulsion_strength` :: `numeric(1)`\cr
 #'   Weighting applied to negative samples in low dimensional embedding optimization.
-#'   Default is `1`. For details see [uwot::umap2()].
+#'   Default is `1`. For details, see [uwot::umap2()].
 #' * `negative_sample_rate` :: `numeric(1)`\cr
 #'   The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
-#'   in optimizing the low dimensional embedding. Default is `5`. For details see [uwot::umap2()].
+#'   in optimizing the low dimensional embedding. Default is `5`. For details, see [uwot::umap2()].
 #' * `a` :: `any`\cr
-#'   More specific parameters controlling the embedding. Default is `NULL`. For details see [uwot::umap2()].
+#'   More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()].
 #' * `b` :: `any`\cr
-#'   More specific parameters controlling the embedding. Default is `NULL`. For details see [uwot::umap2()].
+#'   More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()].
 #' * `nn_method` :: `character(1)` | named `list()` | matrix\cr
-#'   Method for finding nearest neighbors. Default is `NULL`. For details see [uwot::umap2()].
+#'   Method for finding nearest neighbors. Default is `NULL`. For details, see [uwot::umap2()].
 #' * `n_trees` :: `integer(1)`\cr
 #'   Number of trees to build when constructing the nearest neighbor index. Default is `50`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `search_k` :: `integer(1)`\cr
-#'   Number of nodes to search during the neighbor retrieval. For details see [uwot::umap2()].
+#'   Number of nodes to search during the neighbor retrieval. Only used if the `nn_method` is `"annoy"`.
+#'   For details, see [uwot::umap2()].
 #' * `approx_pow` :: `logical(1)`\cr
 #'   If `TRUE`, use an approximation to the power function in the UMAP gradient. Default is `FALSE`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `y` :: `any`\cr
 #'   Optional target data for supervised dimension reduction. Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `target_n_neighbors` :: `integer(1)`\cr
-#'   Number of nearest neighbors to use to construct the target simplicial set. Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   Number of nearest neighbors to use to construct the target simplicial set. Default is `n_neighbors`.
+#'   For details, see [uwot::umap2()].
 #' * `target_metric` :: `character(1)`\cr
 #'   The metric used to measure distance for `y` if using supervised dimension reduction.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `target_weight` :: `numeric(1)`\cr
 #'   Weighting factor between data topology and target topology. Default is `0.5`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `pca` :: `integer(1)`\cr
 #'   Redude data to this number of columns using PCA. Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `pca_center` :: `logical(1)`\cr
 #'   If `TRUE`, center the columns of X before carrying out PCA. Default is `TRUE`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `pca_rand` :: `logical(1)`\cr
 #'   If `TRUE`, use the PCG random number generator (O'Neill, 2014) during optimization.
 #'   Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator.
-#'   Default is `TRUE`. For details see [uwot::umap2()].
+#'   Default is `TRUE`. For details, see [uwot::umap2()].
 #' * `fast_sgd` :: `logical(1)`\cr
 #'   If `TRUE`, then the following combination of parameters is set:
 #'   * `pcg_rand = TRUE`
 #'   * `n_sgd_threads = "auto"`
 #'   * `approx_pow = TRUE`
-#'   Default is `FALSE`. For details see [uwot::umap2()].
+#'   Default is `FALSE`. For details, see [uwot::umap2()].
 #' * `n_threads` :: `integer(1)`\cr
-#'   Number of threads to use. Default is `NULL`. For details see [uwot::umap2()].
+#'   Number of threads to use. Default is `NULL`. For details, see [uwot::umap2()].
 #' * `n_sgd_threads` :: `integer(1)`\cr
 #'   Number of threads to use during stochastic gradient descent. Default is `0`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `grain_size` :: `integer(1)`\cr
 #'   The minimum amount of work to do on each thread. Default is `1`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `verbose` :: `logical(1)`\cr
-#'   Should details be printed? Initialzed to `FALSE`. For details see [uwot::umap2()].
+#'   Should details be printed? Initialzed to `FALSE`. For details, see [uwot::umap2()].
 #' * `batch` :: `logical(1)`\cr
 #'   If `TRUE`, then embedding coordinates are updated at the end of each epoch rather
-#'   than during the epoch. Default is `FALSE`. For details see [uwot::umap2()].
+#'   than during the epoch. Default is `FALSE`. For details, see [uwot::umap2()].
 #' * `opt_args` :: named `list()`\cr
 #'   A list of optimizer parameters, used when `batch = TRUE`. Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `epoch_callback` :: `function`\cr
 #'   A function which will be invoked at the end of every epoch. Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `pca_method` :: `character(1)`\cr
 #'   Method to carry out any PCA dimensionality reduction when the `pca` is specified.
-#'   Default is `NULL`. For details see [uwot::umap2()].
+#'   Default is `NULL`. For details, see [uwot::umap2()].
 #' * `binary_edge_weights` :: `logical(1)`\cr
-#'   If TRUE then edge weights in the input graph are treated as binary (0/1) rather than real valued.
-#'   Default is `FALSE`. For details see [uwot::umap2()].
+#'   If `TRUE` then edge weights in the input graph are treated as binary (0/1) rather than real valued.
+#'   Default is `FALSE`. For details, see [uwot::umap2()].
 #' * `dens_scale` :: `numeric(1)`\cr
 #'   A scaling factor to apply to the density of the input data. Default is `NULL`.
-#'   For details see [uwot::umap2()].
+#'   For details, see [uwot::umap2()].
 #' * `seed` :: `integer(1)`\cr
 #'   Integer seed to use to initialize the random number generator state.
-#'   Default is `NULL`. For details see [uwot::umap2()].
+#'   Default is `NULL`. For details, see [uwot::umap2()].
 #' * `nn_args` :: named `list()`\cr
 #'   A list containing additional arguments to pass to the nearest neighbor method.
-#'   Default is `NULL`. For details see [uwot::umap2()].
+#'   Default is `NULL`. For details, see [uwot::umap2()].
 #'
 #' @section Internals:
-#' Uses the [`umap()`][uwot::umap] function.
+#' Uses the [`umap()`][uwot::umap2] function.
 #'
 #' @section Methods:
 #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
@@ -231,11 +239,15 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         n_neighbors = p_int(2L, 100L, default = 15L, tags = c("train", "umap")),
         n_components = p_int(1L, 100L, default = 2L, tags = c("train", "umap")),
         metric = p_fct(
-          c("euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical"),
+          levels = c(
+            "euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical",
+            "braycurtis", "canberra", "chebyshev", "dice", "hamming", "hellinger", "jaccard",
+            "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
+            "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
+          ),
           default = "euclidean",
-          tags = c("train", "umap"),
-          depends = quote(nn_method == "hnsw")
-        ),
+          tags = c("train", "umap")
+        ),  # why not all?
         n_epochs = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         learning_rate = p_dbl(0, default = 1, tags = c("train", "umap")),
         scale = p_lgl(default = FALSE, special_vals = list("none", "Z", "maxabs", "range", "colrange", NULL), tags = c("train", "umap")),
@@ -270,7 +282,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         search_k = p_int(tags = c("train", "umap")),
         approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")),
         y = p_uty(default = NULL, tags = c("train", "umap")),
-        target_n_neighbors = p_int(tags = c("train", "umap")),
+        target_n_neighbors = p_int(tags = c("train", "umap")), # default = n_neighbors
         target_metric = p_fct(c("euclidean", "cosine", "correlation"), default = "euclidean", tags = c("train", "umap")),
         target_weight = p_dbl(0, 1, default = 0.5, tags = c("train", "umap")),
         pca = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
@@ -283,7 +295,11 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         verbose = p_lgl(default = TRUE, tags = c("train", "umap")),
         batch = p_lgl(default = FALSE, tags = c("train", "umap")),
         opt_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = crate(function(x) check_list(x, null.ok = TRUE))),
-        epoch_callback = p_uty(default = NULL, tags = c("train", "umap"), custom_check = check_function_or_null),
+        epoch_callback = p_uty(
+          default = NULL,
+          tags = c("train", "umap"),
+          custom_check = crate(function(x) check_function(x, args = c("epochs", "n_epochs", "coords"), null.ok = TRUE))
+        ),
         pca_method = p_fct(c("irlba", "rsvd", "bigstatsr", "svd", "auto"), default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         binary_edge_weights = p_lgl(default = FALSE, tags = c("train", "umap")),
         dens_scale = p_dbl(0, 1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
@@ -299,7 +315,6 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
     .train_dt = function(dt, levels, target) {
       params = insert_named(self$param_set$get_values(tags = "umap"), list(ret_model = TRUE))
       umap = invoke(uwot::umap2, dt, .args = params)
-      browser()
       self$state = umap
       umap$embedding
     },

From 8850d83af523a93d0a84c7f526e6ab1f83db0656 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 30 Jul 2024 20:24:21 +0200
Subject: [PATCH 12/36] fixed merge

---
 R/PipeOpUMAP.R | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index b3572eca6..947814fd3 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -247,11 +247,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
           ),
           default = "euclidean",
           tags = c("train", "umap")
-<<<<<<< HEAD
-        ),  # why not all?
-=======
         ),
->>>>>>> 56e43fe2fcae286c88e13d0be596cda22849adfb
         n_epochs = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         learning_rate = p_dbl(0, default = 1, tags = c("train", "umap")),
         scale = p_lgl(default = FALSE, special_vals = list("none", "Z", "maxabs", "range", "colrange", NULL), tags = c("train", "umap")),

From 18000b30316bfc039701c89b63f1c5dc10bf6dc5 Mon Sep 17 00:00:00 2001
From: Maximilian Muecke <muecke.maximilian@gmail.com>
Date: Tue, 30 Jul 2024 20:38:13 +0200
Subject: [PATCH 13/36] docs: redocument

---
 R/PipeOpUMAP.R          |   2 +-
 man/mlr_pipeops_umap.Rd | 136 +++++++++++++++++++++-------------------
 2 files changed, 73 insertions(+), 65 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 947814fd3..40afea686 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -210,7 +210,7 @@
 #'   Default is `NULL`. For details, see [uwot::umap2()].
 #'
 #' @section Internals:
-#' Uses the [`umap()`][uwot::umap2] function.
+#' Uses the [umap2()][uwot::umap2] function.
 #'
 #' @section Methods:
 #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 134824f43..1bf05c50a 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -9,7 +9,7 @@
 }
 \description{
 Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP).
-See \code{\link[uwot:umap2]{uwot::umap2()}} for details.
+See \code{\link[uwot:umap2]{uwot::umap2()}} For details,.
 }
 \section{Construction}{
 
@@ -33,59 +33,66 @@ The output is the input \code{\link[mlr3:Task]{Task}} with all affected numeric
 
 \section{State}{
 
-The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the class \link[uwot:umap2]{uwot::umap2}.
+The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}, as well as the elements of the list
+returned from \link[uwot:umap2]{uwot::umap2}.
 These are in particular:
 \itemize{
 \item \code{embedding} :: \code{matrix}\cr
-Blah
+Matrix of embedded coordinates.
 \item \code{scale_info} :: \code{any}\cr
 Blah
 \item \code{search_k} :: \code{numeric(1)}\cr
-Blah
+Number of nodes searched during the neighbor retrieval. Only used if the \code{nn_method} is \code{"annoy"}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{local_connectivity} :: \code{numeric(1)}\cr
-Blah
+Used local connectivity – i.e. the number of nearest neighbors that should be
+assumed to be connected at a local level. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_epochs} :: \code{numeric(1)}\cr
-Blah
+Number of epochs used during the optimization of the embedded coordinates. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{alpha} :: \code{numeric(1)}\cr
-Blah
+Initial learning rate. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{negative_sample_rate} :: \code{numeric(1)}\cr
-Blah
+The number of negative edge/1-simplex samples used per positive edge/1-simplex sample
+in optimizing the low dimensional embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{method} :: \code{character(1)}\cr
 Blah
 \item \code{a} :: named \code{numeric(1)}\cr
-Blah
+More specific parameters controlling the embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{b} :: named \code{numeric(1)}\cr
-Blah
+More specific parameters controlling the embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{gamma} :: \code{numeric(1)}\cr
 Blah
 \item \code{approx_pow} :: \code{logical(1)}\cr
-Blah
+If \code{TRUE}, use an approximation to the power function in the UMAP gradient. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{metric} :: named \code{list()}\cr
-Blah
+Type of distance metric used to find nearest neighbors. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{norig_col} :: \code{integer(1)}\cr
-Blah
+Number of original columns.
 \item \code{pcg_rand} :: \code{logical(1)}\cr
-Blah
+\code{TRUE}, if the PCG random number generator (O'Neill, 2014) was used during optimization.
+Otherwise, Tausworthe "taus88" generator was used. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{batch} :: \code{logical(1)}\cr
-Blah
+\code{TRUE}, if embedding coordinates were updated at the end of each epoch rather
+than during the epoch. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{opt_args} :: named \code{list()}\cr
-Blah
+Optimizer parameters, used when \code{batch = TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{num_precomputed_nns} :: \code{numeric(1)}\cr
 Blah
 \item \code{min_dist} :: \code{numeric(1)}\cr
-Blah
+The effective minimum distance between embedded points. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{spread} :: \code{numeric(1)}\cr
-Blah
+The effective scale of embedded points. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{binary_edge_weights} :: \code{logical(1)}\cr
-Blah
+If \code{TRUE} then edge weights in the input graph were treated as binary (0/1) rather than real valued.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{seed} :: \code{integer(1)}\cr
 Blah
 \item \code{nn_method} :: \code{any}\cr
-Blah
+Method for finding nearest neighbors. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_args} :: \code{list()}\cr
 Blah
 \item \code{n_neighbors} :: \code{numeric(1)}\cr
-Blah
+The size of the neighborhood used for manifold approximation. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_index} :: named \code{list()}\cr
 Blah
 \item \code{pca_models} :: \code{list()}\cr
@@ -99,129 +106,130 @@ The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}
 \itemize{
 \item \code{n_neighbors} :: \code{integer(1)}\cr
 The size of the neighborhood used for manifold approximation. Default is \code{15}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_components} :: \code{integer(1)}\cr
-The dimension of the space to embed into. Default is \code{2}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+The dimension of the space to embed into. Default is \code{2}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{metric} :: \code{character(1)}\cr
 Type of distance metric to use to find nearest neighbors. Default is \code{"euclidean"}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_epochs} :: \code{integer(1)}\cr
 Number of epochs to use during the optimization of the embedded coordinates. Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{learning_rate} :: \code{numeric(1)}\cr
 Initial learning rate used in optimization of the coordinates. Default is \code{1}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{init} :: \code{character(1)} | \code{matrix}\cr
 Type of initialization for the coordinates. Default is \code{"spectral"}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{init_sdev} :: \code{character(1)} | \code{numeric(1)}\cr
 Scales each dimension of the initialized coordinates to this standard deviation.
-Default is \code{"range"}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{"range"}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{spread} :: \code{numeric(1)}\cr
-The effective scale of embedded points. Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+The effective scale of embedded points. Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{min_dist} :: \code{numeric(1)}\cr
 The effective minimum distance between embedded points. Default is \code{0.01}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{set_op_mix_ratio} :: \code{numeric(1)}\cr
 Interpolate between (fuzzy) union and intersection as the set operation used to
 combine local fuzzy simplicial sets to obtain a global fuzzy simplicial sets. Default is \code{1}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{local_connectivity} :: \code{numeric(1)}\cr
 The local connectivity required – i.e. the number of nearest neighbors that should be
-assumed to be connected at a local level. Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+assumed to be connected at a local level. Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{bandwidth} :: \code{numeric(1)}\cr
 The effective bandwidth of the kernel if we view the algorithm as similar to Laplacian Eigenmaps.
-Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{repulsion_strength} :: \code{numeric(1)}\cr
 Weighting applied to negative samples in low dimensional embedding optimization.
-Default is \code{1}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{negative_sample_rate} :: \code{numeric(1)}\cr
 The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
-in optimizing the low dimensional embedding. Default is \code{5}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+in optimizing the low dimensional embedding. Default is \code{5}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{a} :: \code{any}\cr
-More specific parameters controlling the embedding. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{b} :: \code{any}\cr
-More specific parameters controlling the embedding. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_method} :: \code{character(1)} | named \code{list()} | matrix\cr
-Method for finding nearest neighbors. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Method for finding nearest neighbors. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_trees} :: \code{integer(1)}\cr
 Number of trees to build when constructing the nearest neighbor index. Default is \code{50}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{search_k} :: \code{integer(1)}\cr
-Number of nodes to search during the neighbor retrieval. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Number of nodes to search during the neighbor retrieval. Only used if the \code{nn_method} is \code{"annoy"}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{approx_pow} :: \code{logical(1)}\cr
 If \code{TRUE}, use an approximation to the power function in the UMAP gradient. Default is \code{FALSE}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{y} :: \code{any}\cr
 Optional target data for supervised dimension reduction. Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_n_neighbors} :: \code{integer(1)}\cr
-Number of nearest neighbors to use to construct the target simplicial set. Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Number of nearest neighbors to use to construct the target simplicial set. Default is \code{n_neighbors}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_metric} :: \code{character(1)}\cr
 The metric used to measure distance for \code{y} if using supervised dimension reduction.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_weight} :: \code{numeric(1)}\cr
 Weighting factor between data topology and target topology. Default is \code{0.5}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca} :: \code{integer(1)}\cr
 Redude data to this number of columns using PCA. Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_center} :: \code{logical(1)}\cr
 If \code{TRUE}, center the columns of X before carrying out PCA. Default is \code{TRUE}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_rand} :: \code{logical(1)}\cr
 If \code{TRUE}, use the PCG random number generator (O'Neill, 2014) during optimization.
 Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator.
-Default is \code{TRUE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{fast_sgd} :: \code{logical(1)}\cr
 If \code{TRUE}, then the following combination of parameters is set:
 \itemize{
 \item \code{pcg_rand = TRUE}
 \item \code{n_sgd_threads = "auto"}
 \item \code{approx_pow = TRUE}
-Default is \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 }
 \item \code{n_threads} :: \code{integer(1)}\cr
-Number of threads to use. Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Number of threads to use. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_sgd_threads} :: \code{integer(1)}\cr
 Number of threads to use during stochastic gradient descent. Default is \code{0}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{grain_size} :: \code{integer(1)}\cr
 The minimum amount of work to do on each thread. Default is \code{1}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{verbose} :: \code{logical(1)}\cr
-Should details be printed? Initialzed to \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Should details be printed? Initialzed to \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{batch} :: \code{logical(1)}\cr
 If \code{TRUE}, then embedding coordinates are updated at the end of each epoch rather
-than during the epoch. Default is \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+than during the epoch. Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{opt_args} :: named \code{list()}\cr
 A list of optimizer parameters, used when \code{batch = TRUE}. Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{epoch_callback} :: \code{function}\cr
 A function which will be invoked at the end of every epoch. Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_method} :: \code{character(1)}\cr
 Method to carry out any PCA dimensionality reduction when the \code{pca} is specified.
-Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{binary_edge_weights} :: \code{logical(1)}\cr
-If TRUE then edge weights in the input graph are treated as binary (0/1) rather than real valued.
-Default is \code{FALSE}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+If \code{TRUE} then edge weights in the input graph are treated as binary (0/1) rather than real valued.
+Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{dens_scale} :: \code{numeric(1)}\cr
 A scaling factor to apply to the density of the input data. Default is \code{NULL}.
-For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{seed} :: \code{integer(1)}\cr
 Integer seed to use to initialize the random number generator state.
-Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_args} :: named \code{list()}\cr
 A list containing additional arguments to pass to the nearest neighbor method.
-Default is \code{NULL}. For details see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 }
 }
 
 \section{Internals}{
 
-Uses the \code{\link[uwot:umap]{umap()}} function.
+Uses the \link[uwot:umap2]{umap2()} function.
 }
 
 \section{Methods}{

From 341fd1ea06feb60cbab598e4040d8a754c92c7d5 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 30 Jul 2024 21:04:02 +0200
Subject: [PATCH 14/36] docs: added seed state + feat: rm double metric

---
 R/PipeOpUMAP.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 947814fd3..73c5187cc 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -45,7 +45,7 @@
 #'   The number of negative edge/1-simplex samples used per positive edge/1-simplex sample
 #'   in optimizing the low dimensional embedding. For details, see [uwot::umap2()].
 #' * `method` :: `character(1)`\cr
-#'   Blah
+#'   General method used for dimensionality reduction, is always `"umap"` for this PipeOp.
 #' * `a` :: named `numeric(1)`\cr
 #'   More specific parameters controlling the embedding. For details, see [uwot::umap2()].
 #' * `b` :: named `numeric(1)`\cr
@@ -76,7 +76,7 @@
 #'   If `TRUE` then edge weights in the input graph were treated as binary (0/1) rather than real valued.
 #'   For details, see [uwot::umap2()].
 #' * `seed` :: `integer(1)`\cr
-#'   Blah
+#'   Integer seed to use to initialize the random number generator state. For details, see [uwot::umap2()].
 #' * `nn_method` :: `any`\cr
 #'   Method for finding nearest neighbors. For details, see [uwot::umap2()].
 #' * `nn_args` :: `list()`\cr
@@ -241,7 +241,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         metric = p_fct(
           levels = c(
             "euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical",
-            "braycurtis", "canberra", "chebyshev", "dice", "hamming", "hellinger", "jaccard",
+            "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard",
             "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
             "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
           ),
@@ -282,7 +282,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         search_k = p_int(tags = c("train", "umap")),
         approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")),
         y = p_uty(default = NULL, tags = c("train", "umap")),
-        target_n_neighbors = p_int(tags = c("train", "umap")), # default = n_neighbors
+        target_n_neighbors = p_int(tags = c("train", "umap")),
         target_metric = p_fct(c("euclidean", "cosine", "correlation"), default = "euclidean", tags = c("train", "umap")),
         target_weight = p_dbl(0, 1, default = 0.5, tags = c("train", "umap")),
         pca = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),

From b6b14372748e372afbe7bd81e15e4a3af2d74981 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 30 Jul 2024 21:52:56 +0200
Subject: [PATCH 15/36] docs: remaining state params + document

---
 R/PipeOpUMAP.R          | 15 ++++++++-------
 man/mlr_pipeops_nmf.Rd  |  2 +-
 man/mlr_pipeops_umap.Rd | 19 ++++++++++---------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index d72e1cea9..7cb8fa69c 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -29,8 +29,8 @@
 #' These are in particular:
 #' * `embedding` :: `matrix`\cr
 #'   Matrix of embedded coordinates.
-#' * `scale_info` :: `any`\cr
-#'   Blah
+#' * `scale_info` :: named `list()`\cr
+#'   If `scale`is `TRUE`, this gives the scaling attributes (`center`, `scale`, `nzvcols`) of the scaled data.
 #' * `search_k` :: `numeric(1)`\cr
 #'   Number of nodes searched during the neighbor retrieval. Only used if the `nn_method` is `"annoy"`.
 #'   For details, see [uwot::umap2()].
@@ -51,7 +51,8 @@
 #' * `b` :: named `numeric(1)`\cr
 #'   More specific parameters controlling the embedding. For details, see [uwot::umap2()].
 #' * `gamma` :: `numeric(1)`\cr
-#'   Blah
+#'   Repulsion strength. Weighting applied to negative samples in low dimensional embedding optimization.
+#'   For details, see [uwot::umap2()].
 #' * `approx_pow` :: `logical(1)`\cr
 #'   If `TRUE`, use an approximation to the power function in the UMAP gradient. For details, see [uwot::umap2()].
 #' * `metric` :: named `list()`\cr
@@ -67,7 +68,7 @@
 #' * `opt_args` :: named `list()`\cr
 #'   Optimizer parameters, used when `batch = TRUE`. For details, see [uwot::umap2()].
 #' * `num_precomputed_nns` :: `numeric(1)`\cr
-#'   Blah
+#'   Number of precomputed nearest neighbors, via `nn_method`.
 #' * `min_dist` :: `numeric(1)`\cr
 #'   The effective minimum distance between embedded points. For details, see [uwot::umap2()].
 #' * `spread` :: `numeric(1)`\cr
@@ -80,13 +81,13 @@
 #' * `nn_method` :: `any`\cr
 #'   Method for finding nearest neighbors. For details, see [uwot::umap2()].
 #' * `nn_args` :: `list()`\cr
-#'   Blah
+#'   A list containing additional arguments to pass to the nearest neighbor method. For details, see [uwot::umap2()].
 #' * `n_neighbors` :: `numeric(1)`\cr
 #'   The size of the neighborhood used for manifold approximation. For details, see [uwot::umap2()].
 #' * `nn_index` :: named `list()`\cr
-#'   Blah
+#'   Nearest neighbor index that can be used for transformation of new data points.
 #' * `pca_models` :: `list()`\cr
-#'   Blah
+#'   Used PCA models for initialization, `pca` is specified. For details, see [uwot::umap2()].
 #'
 #' @section Parameters:
 #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 1de3f5083..6e74a4313 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}.
 
 \section{Internals}{
 
-Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and
+Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and
 \code{\link[MASS:ginv]{ginv()}}.
 }
 
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 1bf05c50a..c4c4c12dd 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -39,8 +39,8 @@ These are in particular:
 \itemize{
 \item \code{embedding} :: \code{matrix}\cr
 Matrix of embedded coordinates.
-\item \code{scale_info} :: \code{any}\cr
-Blah
+\item \code{scale_info} :: named \code{list()}\cr
+If \code{scale}is \code{TRUE}, this gives the scaling attributes (\code{center}, \code{scale}, \code{nzvcols}) of the scaled data.
 \item \code{search_k} :: \code{numeric(1)}\cr
 Number of nodes searched during the neighbor retrieval. Only used if the \code{nn_method} is \code{"annoy"}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
@@ -55,13 +55,14 @@ Initial learning rate. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 The number of negative edge/1-simplex samples used per positive edge/1-simplex sample
 in optimizing the low dimensional embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{method} :: \code{character(1)}\cr
-Blah
+General method used for dimensionality reduction, is always \code{"umap"} for this PipeOp.
 \item \code{a} :: named \code{numeric(1)}\cr
 More specific parameters controlling the embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{b} :: named \code{numeric(1)}\cr
 More specific parameters controlling the embedding. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{gamma} :: \code{numeric(1)}\cr
-Blah
+Repulsion strength. Weighting applied to negative samples in low dimensional embedding optimization.
+For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{approx_pow} :: \code{logical(1)}\cr
 If \code{TRUE}, use an approximation to the power function in the UMAP gradient. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{metric} :: named \code{list()}\cr
@@ -77,7 +78,7 @@ than during the epoch. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{opt_args} :: named \code{list()}\cr
 Optimizer parameters, used when \code{batch = TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{num_precomputed_nns} :: \code{numeric(1)}\cr
-Blah
+Number of precomputed nearest neighbors, via \code{nn_method}.
 \item \code{min_dist} :: \code{numeric(1)}\cr
 The effective minimum distance between embedded points. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{spread} :: \code{numeric(1)}\cr
@@ -86,17 +87,17 @@ The effective scale of embedded points. For details, see \code{\link[uwot:umap2]
 If \code{TRUE} then edge weights in the input graph were treated as binary (0/1) rather than real valued.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{seed} :: \code{integer(1)}\cr
-Blah
+Integer seed to use to initialize the random number generator state. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_method} :: \code{any}\cr
 Method for finding nearest neighbors. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_args} :: \code{list()}\cr
-Blah
+A list containing additional arguments to pass to the nearest neighbor method. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_neighbors} :: \code{numeric(1)}\cr
 The size of the neighborhood used for manifold approximation. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_index} :: named \code{list()}\cr
-Blah
+Nearest neighbor index that can be used for transformation of new data points.
 \item \code{pca_models} :: \code{list()}\cr
-Blah
+Used PCA models for initialization, \code{pca} is specified. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 }
 }
 

From 28adeed326c1171838f4935282a88fde895049fd Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Fri, 2 Aug 2024 15:07:18 +0200
Subject: [PATCH 16/36] feat: predict takes computing params that are not taken
 from model by umap_transform by default

---
 R/PipeOpUMAP.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 7cb8fa69c..6a2ecd2f1 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -290,8 +290,8 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         pca_center = p_lgl(default = TRUE, tags = c("train", "umap")),
         pca_rand = p_lgl(default = TRUE, tags = c("train", "umap")),
         fast_sgd = p_lgl(default = FALSE, tags = c("train", "umap")),
-        n_threads = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
-        n_sgd_threads = p_int(0L, default = 0L, special_vals = list("auto"), tags = c("train", "umap")),
+        n_threads = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "predict", "umap")),
+        n_sgd_threads = p_int(0L, default = 0L, special_vals = list("auto"), tags = c("train", "predict", "umap")),
         grain_size = p_int(1L, default = 1L, tags = c("train", "umap")),
         verbose = p_lgl(default = TRUE, tags = c("train", "umap")),
         batch = p_lgl(default = FALSE, tags = c("train", "umap")),
@@ -314,14 +314,15 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
   ),
   private = list(
     .train_dt = function(dt, levels, target) {
-      params = insert_named(self$param_set$get_values(tags = "umap"), list(ret_model = TRUE))
+      params = insert_named(self$param_set$get_values(tags = c("umap", "train")), list(ret_model = TRUE))
       umap = invoke(uwot::umap2, dt, .args = params)
       self$state = umap
       umap$embedding
     },
 
     .predict_dt = function(dt, levels) {
-      invoke(uwot::umap_transform, dt, self$state)
+      params = self$param_set$get_values(tags = c("umap", "predict"))
+      invoke(uwot::umap_transform, dt, self$state, .args = params)
     }
   )
 )

From 114e33cd6093fa02d8e626bc46351858541e19d9 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Fri, 2 Aug 2024 15:10:20 +0200
Subject: [PATCH 17/36] feat: target metrics same as metrics

---
 R/PipeOpUMAP.R | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 6a2ecd2f1..bc57005eb 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -284,7 +284,16 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")),
         y = p_uty(default = NULL, tags = c("train", "umap")),
         target_n_neighbors = p_int(tags = c("train", "umap")),
-        target_metric = p_fct(c("euclidean", "cosine", "correlation"), default = "euclidean", tags = c("train", "umap")),
+        target_metric =  p_fct(
+          levels = c(
+            "euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical",
+            "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard",
+            "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
+            "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
+          ),
+          default = "euclidean",
+          tags = c("train", "umap")
+        ),
         target_weight = p_dbl(0, 1, default = 0.5, tags = c("train", "umap")),
         pca = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         pca_center = p_lgl(default = TRUE, tags = c("train", "umap")),

From 5e60f6f57e554427f5a7f403bb29e10328cb80e4 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Fri, 2 Aug 2024 15:42:04 +0200
Subject: [PATCH 18/36] feat: custom check for param

---
 R/PipeOpUMAP.R | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index bc57005eb..3932b58e3 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -282,7 +282,17 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         n_trees = p_int(10L, 100L, default = 50L, tags = c("train", "umap")),
         search_k = p_int(tags = c("train", "umap")),
         approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")),
-        y = p_uty(default = NULL, tags = c("train", "umap")),
+        y = p_uty(
+          default = NULL,
+          tags = c("train", "umap"),
+          custom_check = crate(function(x) {
+            check_atomic_vector(x) %check||%
+              check_matrix(x) %check||%
+              check_data_frame(x) %check||%
+              check_list(x) %check||%
+              check_null(x)
+          })
+        ),
         target_n_neighbors = p_int(tags = c("train", "umap")),
         target_metric =  p_fct(
           levels = c(

From 5f869876cda1c10b309994f55565c2eeea895cb1 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 15:52:28 +0200
Subject: [PATCH 19/36] feat: removed param options that are incompatible with
 predict + test stub

---
 R/PipeOpUMAP.R                    | 10 +++-------
 tests/testthat/test_pipeop_umap.R | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 3932b58e3..331adbfe1 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -6,7 +6,7 @@
 #'
 #' @description
 #' Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP).
-#' See [uwot::umap2()] For details,.
+#' See [uwot::umap2()] for details.
 #'
 #' @section Construction:
 #' ```
@@ -241,7 +241,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         n_components = p_int(1L, 100L, default = 2L, tags = c("train", "umap")),
         metric = p_fct(
           levels = c(
-            "euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical",
+            "euclidean", "cosine", "manhattan", "hamming", "correlation",
             "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard",
             "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
             "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
@@ -273,11 +273,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         nn_method = p_uty(
           default = NULL,
           tags = c("train", "umap"),
-          custom_check = crate(function(x) {
-            check_choice(x, c("fnn", "annoy", "hnsw", "nndescent"), null.ok = TRUE) %check||%
-              check_list(x, types = "matrix", len = 2L, names = "idx", "dist") %check||%
-              check_class(x, "dgCMatrix")
-          })
+          custom_check = crate(function(x) check_choice(x, c("annoy", "hnsw", "nndescent"), null.ok = TRUE))
         ),
         n_trees = p_int(10L, 100L, default = 50L, tags = c("train", "umap")),
         search_k = p_int(tags = c("train", "umap")),
diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
index eef01e201..fe770c347 100644
--- a/tests/testthat/test_pipeop_umap.R
+++ b/tests/testthat/test_pipeop_umap.R
@@ -3,5 +3,17 @@ context("PipeOpUMAP")
 test_that("PipeOpUMAP - basic properties", {
   skip_if_not_installed("uwot")
   op = PipeOpUMAP$new()
-  expect_pipeop(op)
+  task = mlr_tasks$get("iris")
+  expect_datapreproc_pipeop_class(op, task = task)
+})
+
+test_that("PipeOpUMAP - Compare to uwot::umap2", {
+  skip_if_not_installed("uwot")
+  op = PipeOpUMAP$new()
+  task = mlr_tasks$get("iris")
+
+  # Default parameters
+
+  # Some changed parameters
+
 })

From b8fa65f60ecf9717815cfeab56e6e4f900a60288 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 16:00:12 +0200
Subject: [PATCH 20/36] small docs change and change to target_metric for
 compatibility with predict

---
 R/PipeOpUMAP.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 331adbfe1..9857118a0 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -136,8 +136,9 @@
 #'   More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()].
 #' * `b` :: `any`\cr
 #'   More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()].
-#' * `nn_method` :: `character(1)` | named `list()` | matrix\cr
-#'   Method for finding nearest neighbors. Default is `NULL`. For details, see [uwot::umap2()].
+#' * `nn_method` :: `character(1)`\cr
+#'   Method for finding nearest neighbors. Note that only values compatible with [uwot::umap_transform()] are allowed.
+#'   Default is `NULL`. For details, see [uwot::umap2()].
 #' * `n_trees` :: `integer(1)`\cr
 #'   Number of trees to build when constructing the nearest neighbor index. Default is `50`.
 #'   For details, see [uwot::umap2()].
@@ -292,7 +293,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         target_n_neighbors = p_int(tags = c("train", "umap")),
         target_metric =  p_fct(
           levels = c(
-            "euclidean", "cosine", "manhattan", "hamming", "correlation", "categorical",
+            "euclidean", "cosine", "manhattan", "hamming", "correlation",
             "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard",
             "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
             "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"

From 7881990870d39b6aaaa5b3dd26197f49bf046431 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 20:14:03 +0200
Subject: [PATCH 21/36] docs: param scale + small corr for scale special vals

---
 R/PipeOpUMAP.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 9857118a0..6d0836e06 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -105,6 +105,8 @@
 #' * `learning_rate` :: `numeric(1)`\cr
 #'   Initial learning rate used in optimization of the coordinates. Default is `1`.
 #'   For details, see [uwot::umap2()].
+#' * `scale` :: `logical(1)` / `character(1)`\cr
+#'   Scaling to apply to the data. If `TRUE`, data is standardized. Default is `FALSE`. For details, see [uwot::umap2()].
 #' * `init` :: `character(1)` | `matrix`\cr
 #'   Type of initialization for the coordinates. Default is `"spectral"`.
 #'   For details, see [uwot::umap2()].
@@ -252,7 +254,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         ),
         n_epochs = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         learning_rate = p_dbl(0, default = 1, tags = c("train", "umap")),
-        scale = p_lgl(default = FALSE, special_vals = list("none", "Z", "maxabs", "range", "colrange", NULL), tags = c("train", "umap")),
+        scale = p_lgl(default = FALSE, special_vals = list("none", "Z", "scale", "maxabs", "range", "colrange", NULL), tags = c("train", "umap")),
         init = p_uty(
           default = "spectral",
           tags = c("train", "umap"),

From 79c7dbc1e98bb7a1840220dd1fa354c1ab14cc07 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 20:57:04 +0200
Subject: [PATCH 22/36] added tests

---
 tests/testthat/test_pipeop_umap.R | 73 ++++++++++++++++++++++++++++---
 1 file changed, 67 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
index fe770c347..0a389f88b 100644
--- a/tests/testthat/test_pipeop_umap.R
+++ b/tests/testthat/test_pipeop_umap.R
@@ -3,17 +3,78 @@ context("PipeOpUMAP")
 test_that("PipeOpUMAP - basic properties", {
   skip_if_not_installed("uwot")
   op = PipeOpUMAP$new()
-  task = mlr_tasks$get("iris")
-  expect_datapreproc_pipeop_class(op, task = task)
+  task = mlr_tasks$get("iris")$filter(1:30)
+
+  expect_pipeop(op)
+
+  expect_task(op$train(list(task))[[1]])
+  expect_task(op$predict(list(task))[[1]])
+
 })
 
-test_that("PipeOpUMAP - Compare to uwot::umap2", {
+test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default Params", {
   skip_if_not_installed("uwot")
+  task = mlr_tasks$get("iris")$filter(1:30)
+
   op = PipeOpUMAP$new()
-  task = mlr_tasks$get("iris")
+  pv = list(seed = 1234L)
+  op$param_set$set_values(.values = pv)
+
+  train_out = train_pipeop(op, list(task))[[1L]]
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+
+  state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
+                  "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
+                  "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
+  expect_true(all(state_names %in% names(op$state)))
+  state_names_wo_pointers = setdiff(state_names, "nn_index") #  since pointers in element 1 will not be equal
+  expect_identical(op$state[state_names_wo_pointers], umap_out[state_names_wo_pointers])
+  expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
+
+  predict_out = predict_pipeop(op, list(task))[[1L]]
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
+
+})
+
+
+test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params", {
+  skip_if_not_installed("uwot")
+  task = mlr_tasks$get("iris")$filter(1:30)
+
+  op = PipeOpUMAP$new()
+  pv = list(
+    seed = 1234L,
+    nn_method = "annoy",
+    n_neighbors = 10L,
+    metric = "correlation",
+    n_epochs = 100L,
+    learning_rate = 0.5,
+    scale = FALSE,
+    init = "pca",
+    init_sdev = 1e-4,
+    set_op_mix_ratio = 0.5,
+    local_connectivity = 1.1,
+    bandwidth = 0.9,
+    repulsion_strength = 1.1,
+    negative_sample_rate = 6,
+    y = task$data()[, 1]
+  )
+  op$param_set$set_values(.values = pv)
+
+  train_out = train_pipeop(op, list(task))[[1L]]
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
 
-  # Default parameters
+  state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
+                  "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
+                  "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
+  expect_true(all(state_names %in% names(op$state)))
+  state_names = setdiff(state_names, "nn_index") #  since pointers in state$nn_index$element1 will not be equal
+  expect_identical(op$state[state_names], umap_out[state_names])
+  expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
-  # Some changed parameters
+  predict_out = predict_pipeop(op, list(task))[[1L]]
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })

From 9d29051c688934eab51be5687fa87ea498e58922 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 21:04:32 +0200
Subject: [PATCH 23/36] docs: run document

---
 man/mlr_pipeops_umap.Rd | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index c4c4c12dd..d5140a246 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -9,7 +9,7 @@
 }
 \description{
 Carry out dimensionality reduction of a dataset using the Uniform Manifold Approximation and Projection (UMAP).
-See \code{\link[uwot:umap2]{uwot::umap2()}} For details,.
+See \code{\link[uwot:umap2]{uwot::umap2()}} for details.
 }
 \section{Construction}{
 
@@ -119,6 +119,8 @@ For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{learning_rate} :: \code{numeric(1)}\cr
 Initial learning rate used in optimization of the coordinates. Default is \code{1}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+\item \code{scale} :: \code{logical(1)} / \code{character(1)}\cr
+Scaling to apply to the data. If \code{TRUE}, data is standardized. Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{init} :: \code{character(1)} | \code{matrix}\cr
 Type of initialization for the coordinates. Default is \code{"spectral"}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
@@ -150,8 +152,9 @@ in optimizing the low dimensional embedding. Default is \code{5}. For details, s
 More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{b} :: \code{any}\cr
 More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
-\item \code{nn_method} :: \code{character(1)} | named \code{list()} | matrix\cr
-Method for finding nearest neighbors. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+\item \code{nn_method} :: \code{character(1)}\cr
+Method for finding nearest neighbors. Note that only values compatible with \code{\link[uwot:umap_transform]{uwot::umap_transform()}} are allowed.
+Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{n_trees} :: \code{integer(1)}\cr
 Number of trees to build when constructing the nearest neighbor index. Default is \code{50}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.

From 8545c79485bab327262575b158811846b66bc7c5 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 21:19:31 +0200
Subject: [PATCH 24/36] updated NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index fc42cae16..4f583d8c2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # mlr3pipelines 0.6.1
 
 * New PipeOp `PipeOpRowApply` / `po("rowapply")`
+* New PipeOp `PipeOpUMAP` / `po("umap")`
 
 # mlr3pipelines 0.6.0
 

From 97a616afc0c0cd60a709ab5425cd8bb74227ed38 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 22:11:19 +0200
Subject: [PATCH 25/36] added packages to initialize

---
 R/PipeOpUMAP.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 6d0836e06..67b6cba16 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -327,7 +327,7 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
       )
       ps$set_values(verbose = FALSE)
 
-      super$initialize(id, param_set = ps, param_vals = param_vals, feature_types = c("numeric", "integer"))
+      super$initialize(id, param_set = ps, param_vals = param_vals, packages = "uwot", feature_types = c("numeric", "integer"))
     }
   ),
   private = list(

From ccdb4ae643e97d3675fab833a27405474103a78f Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 23:08:35 +0200
Subject: [PATCH 26/36] docs: added ref to paper

---
 R/PipeOpUMAP.R          |  2 ++
 R/bibentries.R          | 13 +++++++++++++
 man/mlr_pipeops_umap.Rd |  7 +++++++
 3 files changed, 22 insertions(+)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 67b6cba16..b609dea07 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -231,6 +231,8 @@
 #'
 #' pop$state
 #' \dontshow{ \} }
+#' @references
+#' `r format_bib("mcinnes_2018")`
 #' @family PipeOps
 #' @template seealso_pipeopslist
 #' @include PipeOpTaskPreproc.R
diff --git a/R/bibentries.R b/R/bibentries.R
index de55741d5..a78b6103a 100644
--- a/R/bibentries.R
+++ b/R/bibentries.R
@@ -52,5 +52,18 @@ bibentries = c(
     author    = "Yujun Wu and Dennis D Boos and Leonard A Stefanski",
     title     = "Controlling Variable Selection by the Addition of Pseudovariables",
     journal   = "Journal of the American Statistical Association"
+  ),
+
+  mcinnes_2018 = bibentry("article",
+    doi        = "10.21105/joss.00861",
+    year       = "2018",
+    month      = "9",
+    publisher  = "The Open Journal",
+    volume     = "3",
+    number     = "29",
+    author     = "Leland McInnes and John Healy and James Melville and Lukas Großberger",
+    title      = "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction",
+    journal    = "Journal of Open Source Software"
   )
 )
+
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index d5140a246..d57694cbb 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -254,6 +254,12 @@ pop$train(list(task))[[1]]$data()
 pop$state
 \dontshow{ \} }
 }
+\references{
+McInnes L, Healy J, Melville J, Großberger L (2018).
+\dQuote{UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction.}
+\emph{Journal of Open Source Software}, \bold{3}(29).
+\doi{10.21105/joss.00861}.
+}
 \seealso{
 https://mlr-org.com/pipeops.html
 
@@ -312,6 +318,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_removeconstants}},
 \code{\link{mlr_pipeops_renamecolumns}},
 \code{\link{mlr_pipeops_replicate}},
+\code{\link{mlr_pipeops_rowapply}},
 \code{\link{mlr_pipeops_scale}},
 \code{\link{mlr_pipeops_scalemaxabs}},
 \code{\link{mlr_pipeops_scalerange}},

From 56ba86160873733eaa1392cb9b145c3900af43f8 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 6 Aug 2024 23:19:57 +0200
Subject: [PATCH 27/36] docs: fixed non-ASCII char + document

---
 R/bibentries.R          | 2 +-
 man/mlr_pipeops_umap.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/bibentries.R b/R/bibentries.R
index a78b6103a..09c994452 100644
--- a/R/bibentries.R
+++ b/R/bibentries.R
@@ -61,7 +61,7 @@ bibentries = c(
     publisher  = "The Open Journal",
     volume     = "3",
     number     = "29",
-    author     = "Leland McInnes and John Healy and James Melville and Lukas Großberger",
+    author     = "Leland McInnes and John Healy and James Melville and Lukas Grossberger",
     title      = "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction",
     journal    = "Journal of Open Source Software"
   )
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index d57694cbb..28d0f05c9 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -255,7 +255,7 @@ pop$state
 \dontshow{ \} }
 }
 \references{
-McInnes L, Healy J, Melville J, Großberger L (2018).
+McInnes L, Healy J, Melville J, Grossberger L (2018).
 \dQuote{UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction.}
 \emph{Journal of Open Source Software}, \bold{3}(29).
 \doi{10.21105/joss.00861}.

From 903c4fc52276ef892cd49f56ef81a597e17847e1 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Wed, 7 Aug 2024 21:20:24 +0200
Subject: [PATCH 28/36] fix test failures due to crate

---
 R/PipeOpUMAP.R          | 14 ++++++++------
 man/mlr_pipeops_umap.Rd |  1 +
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index b609dea07..9bd224fa1 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -219,6 +219,9 @@
 #' @section Methods:
 #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
 #'
+#' @references
+#' `r format_bib("mcinnes_2018")`
+#'
 #' @examples
 #' \dontshow{ if (requireNamespace("uwot")) \{ }
 #' library("mlr3")
@@ -231,8 +234,7 @@
 #'
 #' pop$state
 #' \dontshow{ \} }
-#' @references
-#' `r format_bib("mcinnes_2018")`
+#'
 #' @family PipeOps
 #' @template seealso_pipeopslist
 #' @include PipeOpTaskPreproc.R
@@ -260,10 +262,10 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         init = p_uty(
           default = "spectral",
           tags = c("train", "umap"),
-          custom_check = crate(function(x) {
+          custom_check = function(x) {
             choices = c("spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", "agspectral")
             check_choice(x, choices) %check||% check_matrix(x)
-          })
+          }
         ),
         init_sdev = p_uty(default = "range", tags = c("train", "umap")),
         spread = p_dbl(default = 1, tags = c("train", "umap")),
@@ -286,13 +288,13 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         y = p_uty(
           default = NULL,
           tags = c("train", "umap"),
-          custom_check = crate(function(x) {
+          custom_check = function(x) {
             check_atomic_vector(x) %check||%
               check_matrix(x) %check||%
               check_data_frame(x) %check||%
               check_list(x) %check||%
               check_null(x)
-          })
+          }
         ),
         target_n_neighbors = p_int(tags = c("train", "umap")),
         target_metric =  p_fct(
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 28d0f05c9..ea177bebb 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -253,6 +253,7 @@ pop$train(list(task))[[1]]$data()
 
 pop$state
 \dontshow{ \} }
+
 }
 \references{
 McInnes L, Healy J, Melville J, Grossberger L (2018).

From 8be8cdddb3c4fc63b416a1b397dfdeb4ac004281 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Wed, 7 Aug 2024 21:32:55 +0200
Subject: [PATCH 29/36] docs: document + missing changes in master/man

---
 R/PipeOpUMAP.R              | 1 -
 man/mlr_pipeops_nmf.Rd      | 2 +-
 man/mlr_pipeops_rowapply.Rd | 6 +++---
 man/mlr_pipeops_umap.Rd     | 1 -
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 9bd224fa1..c0cf6c190 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -234,7 +234,6 @@
 #'
 #' pop$state
 #' \dontshow{ \} }
-#'
 #' @family PipeOps
 #' @template seealso_pipeopslist
 #' @include PipeOpTaskPreproc.R
diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 36a726258..ce74b22e5 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -96,7 +96,7 @@ See \code{\link[NMF:nmf]{nmf()}}.
 
 \section{Internals}{
 
-Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis]{basis()}}, \code{\link[NMF:coef]{coef()}} and
+Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and
 \code{\link[MASS:ginv]{ginv()}}.
 }
 
diff --git a/man/mlr_pipeops_rowapply.Rd b/man/mlr_pipeops_rowapply.Rd
index 85e0ac30e..e57437129 100644
--- a/man/mlr_pipeops_rowapply.Rd
+++ b/man/mlr_pipeops_rowapply.Rd
@@ -46,14 +46,13 @@ Function to apply to each row in the affected columns of the task.
 The return value should be a vector of the same length for every input.
 Initialized as \code{\link[base:identity]{identity()}}.
 \item \code{col_prefix} :: \code{character(1)}\cr
-If specified, prefix to be prepended to the column names of affected columns, separated by a dot (\code{.}). Default is \code{""}.
+If specified, prefix to be prepended to the column names of affected columns, separated by a dot (\code{.}). Initialized as \code{""}.
 }
 }
 
 \section{Internals}{
 
-Calls \code{\link{apply}} on the data, using the value of \code{applicator} as \code{FUN} and \code{simplify = TRUE}, then coerces the output via
-\code{\link[data.table:as.data.table]{as.data.table()}}.
+Calls \code{\link{apply}} on the data, using the value of \code{applicator} as \code{FUN}.
 }
 
 \section{Fields}{
@@ -144,6 +143,7 @@ Other PipeOps:
 \code{\link{mlr_pipeops_textvectorizer}},
 \code{\link{mlr_pipeops_threshold}},
 \code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_umap}},
 \code{\link{mlr_pipeops_unbranch}},
 \code{\link{mlr_pipeops_updatetarget}},
 \code{\link{mlr_pipeops_vtreat}},
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index ea177bebb..28d0f05c9 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -253,7 +253,6 @@ pop$train(list(task))[[1]]$data()
 
 pop$state
 \dontshow{ \} }
-
 }
 \references{
 McInnes L, Healy J, Melville J, Grossberger L (2018).

From daf28df2a0dc2ab24dcea4f2d6ccc9e92017e5d1 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Thu, 8 Aug 2024 10:08:11 +0200
Subject: [PATCH 30/36] fix: crate workaround

---
 R/PipeOpUMAP.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index c0cf6c190..018098775 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -261,10 +261,10 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         init = p_uty(
           default = "spectral",
           tags = c("train", "umap"),
-          custom_check = function(x) {
+          custom_check = crate(function(x) {
             choices = c("spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", "agspectral")
             check_choice(x, choices) %check||% check_matrix(x)
-          }
+          }, .parent = topenv())
         ),
         init_sdev = p_uty(default = "range", tags = c("train", "umap")),
         spread = p_dbl(default = 1, tags = c("train", "umap")),
@@ -287,13 +287,13 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         y = p_uty(
           default = NULL,
           tags = c("train", "umap"),
-          custom_check = function(x) {
+          custom_check = crate(function(x) {
             check_atomic_vector(x) %check||%
               check_matrix(x) %check||%
               check_data_frame(x) %check||%
               check_list(x) %check||%
               check_null(x)
-          }
+          }, .parent = topenv())
         ),
         target_n_neighbors = p_int(tags = c("train", "umap")),
         target_metric =  p_fct(

From 394eb701a05b43f226cf43e4d7639663fe9f43a9 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Wed, 14 Aug 2024 17:00:01 +0200
Subject: [PATCH 31/36] Added depends to params + document

---
 R/PipeOpUMAP.R          | 64 ++++++++++++++++++++++++++++-------------
 man/mlr_pipeops_umap.Rd |  2 +-
 2 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 018098775..99f04905b 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -168,7 +168,7 @@
 #' * `pca_center` :: `logical(1)`\cr
 #'   If `TRUE`, center the columns of X before carrying out PCA. Default is `TRUE`.
 #'   For details, see [uwot::umap2()].
-#' * `pca_rand` :: `logical(1)`\cr
+#' * `pcg_rand` :: `logical(1)`\cr
 #'   If `TRUE`, use the PCG random number generator (O'Neill, 2014) during optimization.
 #'   Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator.
 #'   Default is `TRUE`. For details, see [uwot::umap2()].
@@ -243,8 +243,8 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
   public = list(
     initialize = function(id = "umap", param_vals = list()) {
       ps = ps(
-        n_neighbors = p_int(2L, 100L, default = 15L, tags = c("train", "umap")),
-        n_components = p_int(1L, 100L, default = 2L, tags = c("train", "umap")),
+        n_neighbors = p_int(lower = 1L, default = 15L, tags = c("train", "umap")),
+        n_components = p_int(lower = 1L, default = 2L, tags = c("train", "umap")),
         metric = p_fct(
           levels = c(
             "euclidean", "cosine", "manhattan", "hamming", "correlation",
@@ -255,8 +255,8 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
           default = "euclidean",
           tags = c("train", "umap")
         ),
-        n_epochs = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
-        learning_rate = p_dbl(0, default = 1, tags = c("train", "umap")),
+        n_epochs = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        learning_rate = p_dbl(lower = 0, default = 1, tags = c("train", "umap")),
         scale = p_lgl(default = FALSE, special_vals = list("none", "Z", "scale", "maxabs", "range", "colrange", NULL), tags = c("train", "umap")),
         init = p_uty(
           default = "spectral",
@@ -266,11 +266,11 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
             check_choice(x, choices) %check||% check_matrix(x)
           }, .parent = topenv())
         ),
-        init_sdev = p_uty(default = "range", tags = c("train", "umap")),
+        init_sdev = p_dbl(default = "range", special_vals = list("range"), tags = c("train", "umap")),
         spread = p_dbl(default = 1, tags = c("train", "umap")),
         min_dist = p_dbl(default = 0.01, tags = c("train", "umap")),
-        set_op_mix_ratio = p_dbl(0, 1, default = 1, tags = c("train", "umap")),
-        local_connectivity = p_dbl(1, default = 1, tags = c("train", "umap")),
+        set_op_mix_ratio = p_dbl(lower = 0, upper = 1, default = 1, tags = c("train", "umap")),
+        local_connectivity = p_dbl(lower = 1, default = 1, tags = c("train", "umap")),
         bandwidth = p_dbl(default = 1, tags = c("train", "umap")),
         repulsion_strength = p_dbl(default = 1, tags = c("train", "umap")),
         negative_sample_rate = p_dbl(default = 5, tags = c("train", "umap")),
@@ -281,8 +281,9 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
           tags = c("train", "umap"),
           custom_check = crate(function(x) check_choice(x, c("annoy", "hnsw", "nndescent"), null.ok = TRUE))
         ),
-        n_trees = p_int(10L, 100L, default = 50L, tags = c("train", "umap")),
-        search_k = p_int(tags = c("train", "umap")),
+        n_trees = p_int(lower = 1L, default = 50L, tags = c("train", "umap"), depends = quote(nn_method == "annoy")),
+        search_k = p_int(tags = c("train", "umap"), depends = quote(nn_method == "annoy")),
+        # approx_pow is only used if dens_scale is non-NULL
         approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")),
         y = p_uty(
           default = NULL,
@@ -291,11 +292,13 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
             check_atomic_vector(x) %check||%
               check_matrix(x) %check||%
               check_data_frame(x) %check||%
-              check_list(x) %check||%
+              check_list(x, len = 2, names = "unique") %check||%
               check_null(x)
           }, .parent = topenv())
         ),
+        # target_n_neighbors is only used if y is non-NULL and numeric
         target_n_neighbors = p_int(tags = c("train", "umap")),
+        # target_metric is only used if y is non-NULL and numeric
         target_metric =  p_fct(
           levels = c(
             "euclidean", "cosine", "manhattan", "hamming", "correlation",
@@ -306,27 +309,48 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
           default = "euclidean",
           tags = c("train", "umap")
         ),
-        target_weight = p_dbl(0, 1, default = 0.5, tags = c("train", "umap")),
-        pca = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        # target_weight is only used if y is non-NULL
+        target_weight = p_dbl(lower = 0, upper = 1, default = 0.5, tags = c("train", "umap")),
+        # pca is ignored if metric is "hamming"
+        pca = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap"),
+                    depends = quote(metric %in% c(
+                      "euclidean", "cosine", "manhattan", "correlation",
+                      "braycurtis", "canberra", "chebyshev", "dice", "hellinger", "jaccard",
+                      "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
+                      "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
+                    ))),
+        # pca_center might only be used if pca is specified (documentation unclear)
         pca_center = p_lgl(default = TRUE, tags = c("train", "umap")),
-        pca_rand = p_lgl(default = TRUE, tags = c("train", "umap")),
+        pcg_rand = p_lgl(default = TRUE, tags = c("train", "umap")),
         fast_sgd = p_lgl(default = FALSE, tags = c("train", "umap")),
-        n_threads = p_int(1L, default = NULL, special_vals = list(NULL), tags = c("train", "predict", "umap")),
-        n_sgd_threads = p_int(0L, default = 0L, special_vals = list("auto"), tags = c("train", "predict", "umap")),
-        grain_size = p_int(1L, default = 1L, tags = c("train", "umap")),
+        n_threads = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "predict", "umap")),
+        n_sgd_threads = p_int(lower = 0L, default = 0L, special_vals = list("auto"), tags = c("train", "predict", "umap")),
+        grain_size = p_int(lower = 1L, default = 1L, tags = c("train", "umap")),
         verbose = p_lgl(default = TRUE, tags = c("train", "umap")),
         batch = p_lgl(default = FALSE, tags = c("train", "umap")),
-        opt_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = crate(function(x) check_list(x, null.ok = TRUE))),
+        opt_args = p_uty(
+          default = NULL,
+          tags = c("train", "umap"),
+          custom_check = crate(function(x) check_list(x, types = c("numeric", "character"), min.len = 1, max.len = 5,
+                                                      names = "unique", null.ok = TRUE)),
+          depends = quote(batch == TRUE)
+        ),
         epoch_callback = p_uty(
           default = NULL,
           tags = c("train", "umap"),
           custom_check = crate(function(x) check_function(x, args = c("epochs", "n_epochs", "coords"), null.ok = TRUE))
         ),
+        # pca_method is only used if pca is specified
         pca_method = p_fct(c("irlba", "rsvd", "bigstatsr", "svd", "auto"), default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         binary_edge_weights = p_lgl(default = FALSE, tags = c("train", "umap")),
-        dens_scale = p_dbl(0, 1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        dens_scale = p_dbl(lower = 0, upper = 1, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         seed = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
-        nn_args = p_uty(default = NULL, tags = c("train", "umap"), custom_check = crate(function(x) check_list(x, null.ok = TRUE)))
+        nn_args = p_uty(
+          default = NULL,
+          tags = c("train", "umap"),
+          custom_check = crate(function(x) check_list(x, types = c("integer", "numeric", "character"),
+                                                      min.len = 1, max.len = 8, names = "unique", null.ok = TRUE))
+        )
       )
       ps$set_values(verbose = FALSE)
 
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 28d0f05c9..69ebd3124 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -182,7 +182,7 @@ For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_center} :: \code{logical(1)}\cr
 If \code{TRUE}, center the columns of X before carrying out PCA. Default is \code{TRUE}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
-\item \code{pca_rand} :: \code{logical(1)}\cr
+\item \code{pcg_rand} :: \code{logical(1)}\cr
 If \code{TRUE}, use the PCG random number generator (O'Neill, 2014) during optimization.
 Otherwise, use the faster (but probably less statistically good) Tausworthe "taus88" generator.
 Default is \code{TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.

From 53ae9a7adac1b2bbb8923bf27a4e6b7493e7f467 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Wed, 14 Aug 2024 17:49:00 +0200
Subject: [PATCH 32/36] Added preproc tests + updated DESRIPTION

---
 DESCRIPTION                       |  3 +++
 tests/testthat/test_pipeop_umap.R | 27 ++++++++++++++++++---------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8357e3b26..52c168942 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -89,6 +89,9 @@ Suggests:
     future,
     htmlwidgets,
     uwot (>= 0.2.1),
+    RcppAnnoy,
+    RcppHNSW,
+    rnndescent,
     ranger
 ByteCompile: true
 Encoding: UTF-8
diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
index 0a389f88b..f1883589e 100644
--- a/tests/testthat/test_pipeop_umap.R
+++ b/tests/testthat/test_pipeop_umap.R
@@ -2,17 +2,23 @@ context("PipeOpUMAP")
 
 test_that("PipeOpUMAP - basic properties", {
   skip_if_not_installed("uwot")
-  op = PipeOpUMAP$new()
-  task = mlr_tasks$get("iris")$filter(1:30)
+  skip_if_not_installed("RcppAnnoy")
+  skip_if_not_installed("RcppHNSW")
+  skip_if_not_installed("rnndescent")
 
-  expect_pipeop(op)
+  task = mlr_tasks$get("iris")$filter(1:30)
 
-  expect_task(op$train(list(task))[[1]])
-  expect_task(op$predict(list(task))[[1]])
+  # Test for different nn_methods since they are relying on different packages and deep clone is impleneted differently
+  expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "annoy")),
+                                  deterministic_train = FALSE, deterministic_predict = FALSE, task = task)
+  expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "hnsw")),
+                                  deterministic_train = FALSE, deterministic_predict = FALSE, task = task)
+  expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "nndescent")),
+                                  deterministic_train = FALSE, deterministic_predict = FALSE, task = task)
 
 })
 
-test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default Params", {
+test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default Params, nn_method = annoy", {
   skip_if_not_installed("uwot")
   task = mlr_tasks$get("iris")$filter(1:30)
 
@@ -27,7 +33,7 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names_wo_pointers = setdiff(state_names, "nn_index") #  since pointers in element 1 will not be equal
+  state_names_wo_pointers = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names_wo_pointers], umap_out[state_names_wo_pointers])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
@@ -38,7 +44,7 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default
 })
 
 
-test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params", {
+test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params, nn_method = annoy", {
   skip_if_not_installed("uwot")
   task = mlr_tasks$get("iris")$filter(1:30)
 
@@ -69,7 +75,7 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names = setdiff(state_names, "nn_index") #  since pointers in state$nn_index$element1 will not be equal
+  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names], umap_out[state_names])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
@@ -78,3 +84,6 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
   expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })
+
+# weitere tests für nn_methods
+# for these use options that are specific to that method

From 2eef42dc417b102ba9d8058c48c599f96ca8bc55 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Wed, 14 Aug 2024 17:49:33 +0200
Subject: [PATCH 33/36] Add deep_clone, currently WIP

---
 R/PipeOpUMAP.R | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index 99f04905b..ff945b913 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -368,6 +368,26 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
     .predict_dt = function(dt, levels) {
       params = self$param_set$get_values(tags = c("umap", "predict"))
       invoke(uwot::umap_transform, dt, self$state, .args = params)
+    },
+
+    # We need to overload deep_clone since state$nn_index$ann is a C++ address if nn_method is "annoy" or "hnsw"
+    deep_clone = function(name, value) {
+      if (name == "state" && "NO_OP" %nin% class(value)) {
+        # TODO: Make sure these class names are correct for different options for nn_args
+        # attr(attr(value$nn_index, "class"), "package") might work otherwise
+        if (class(value$nn_index$ann) %in% c("RcppHNSWL2", "Rcpp_AnnoyEuclidean")) {
+          state = value
+          state$nn_index$ann = value$nn_index$ann$copy()
+          state$nn_index$type = value$nn_index$type
+          state$nn_index$metric = value$nn_index$metric
+          state$nn_index$ndim = value$nn_index$ndim
+          state
+        } else {
+          super$deep_clone(name, value)
+        }
+      } else {
+        super$deep_clone(name, value)
+      }
     }
   )
 )

From d854eaefd805c6e57efd4496b5f8285f07da79a7 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Tue, 20 Aug 2024 19:11:35 +0200
Subject: [PATCH 34/36] added tests for different nn_methods

---
 tests/testthat/test_pipeop_umap.R | 93 ++++++++++++++++++++++++++++++-
 1 file changed, 90 insertions(+), 3 deletions(-)

diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
index f1883589e..94d766365 100644
--- a/tests/testthat/test_pipeop_umap.R
+++ b/tests/testthat/test_pipeop_umap.R
@@ -8,7 +8,7 @@ test_that("PipeOpUMAP - basic properties", {
 
   task = mlr_tasks$get("iris")$filter(1:30)
 
-  # Test for different nn_methods since they are relying on different packages and deep clone is impleneted differently
+  # Test for different nn_methods since they are relying on different packages and deep clone is implemented differently
   expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "annoy")),
                                   deterministic_train = FALSE, deterministic_predict = FALSE, task = task)
   expect_datapreproc_pipeop_class(PipeOpUMAP, constargs = list(param_vals = list(nn_method = "hnsw")),
@@ -46,6 +46,7 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default
 
 test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params, nn_method = annoy", {
   skip_if_not_installed("uwot")
+  skip_if_not_installed("RcppAnnoy")
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
@@ -85,5 +86,91 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
 
 })
 
-# weitere tests für nn_methods
-# for these use options that are specific to that method
+
+test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params, nn_method = hnsw", {
+  skip_if_not_installed("uwot")
+  skip_if_not_installed("RcppHNSW")
+  task = mlr_tasks$get("iris")$filter(1:30)
+
+  op = PipeOpUMAP$new()
+  pv = list(
+    seed = 1234L,
+    nn_method = "hnsw",
+    n_neighbors = 10L,
+    metric = "correlation",
+    n_epochs = 100L,
+    learning_rate = 0.5,
+    scale = FALSE,
+    init = "pca",
+    init_sdev = 1e-4,
+    set_op_mix_ratio = 0.5,
+    local_connectivity = 1.1,
+    bandwidth = 0.9,
+    repulsion_strength = 1.1,
+    negative_sample_rate = 6,
+    y = task$data()[, 1],
+    nn_args = list(M = 10L, ef_construction = 100L, ef = 20L)
+  )
+  op$param_set$set_values(.values = pv)
+
+  train_out = train_pipeop(op, list(task))[[1L]]
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+
+  state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
+                  "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
+                  "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
+  expect_true(all(state_names %in% names(op$state)))
+  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  expect_identical(op$state[state_names], umap_out[state_names])
+  expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
+
+  predict_out = predict_pipeop(op, list(task))[[1L]]
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
+
+})
+
+
+test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed Params, nn_method = rnndescent", {
+  skip_if_not_installed("uwot")
+  skip_if_not_installed("rnndescent")
+  task = mlr_tasks$get("iris")$filter(1:30)
+
+  op = PipeOpUMAP$new()
+  pv = list(
+    seed = 1234L,
+    nn_method = "nndescent",
+    n_neighbors = 10L,
+    metric = "symmetrickl",
+    n_epochs = 100L,
+    learning_rate = 0.5,
+    scale = FALSE,
+    init = "pca",
+    init_sdev = 1e-4,
+    set_op_mix_ratio = 0.5,
+    local_connectivity = 1.1,
+    bandwidth = 0.9,
+    repulsion_strength = 1.1,
+    negative_sample_rate = 6,
+    y = task$data()[, 1],
+    nn_args = list(n_trees = 15L, max_candidates = 15L, pruning_degree_multiplier = 1.4, epsilon = 0.05)
+  )
+  op$param_set$set_values(.values = pv)
+
+  train_out = train_pipeop(op, list(task))[[1L]]
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+
+  state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
+                  "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
+                  "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
+  expect_true(all(state_names %in% names(op$state)))
+
+  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  expect_identical(op$state[state_names], umap_out[state_names])
+  expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
+
+  predict_out = predict_pipeop(op, list(task))[[1L]]
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
+
+})

From e1b113eeb67cdbab8d17c9a492c3ebd676fec706 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Fri, 23 Aug 2024 16:49:43 +0200
Subject: [PATCH 35/36] feat: params for transform + better param defs + docs

---
 R/PipeOpUMAP.R          | 175 +++++++++++++++++++++++++---------------
 man/mlr_pipeops_umap.Rd |  58 +++++++++----
 2 files changed, 151 insertions(+), 82 deletions(-)

diff --git a/R/PipeOpUMAP.R b/R/PipeOpUMAP.R
index ff945b913..49798bbb2 100644
--- a/R/PipeOpUMAP.R
+++ b/R/PipeOpUMAP.R
@@ -107,9 +107,11 @@
 #'   For details, see [uwot::umap2()].
 #' * `scale` :: `logical(1)` / `character(1)`\cr
 #'   Scaling to apply to the data. If `TRUE`, data is standardized. Default is `FALSE`. For details, see [uwot::umap2()].
-#' * `init` :: `character(1)` | `matrix`\cr
-#'   Type of initialization for the coordinates. Default is `"spectral"`.
-#'   For details, see [uwot::umap2()].
+#' * `init` :: `character(1)`\cr
+#'   Type of initialization for the coordinates. May be set to `"custom"`, in which case the `matrix` of initial
+#'   coordinates passed to `init_custom` is used. Default is `"spectral"`. For details, see [uwot::umap2()].
+#' * `init_custom` :: `matrix`\cr
+#'   Matrix of initial coordinates. Only used, if `init` is `"custom"`.
 #' * `init_sdev` :: `character(1)` | `numeric(1)`\cr
 #'   Scales each dimension of the initialized coordinates to this standard deviation.
 #'   Default is `"range"`. For details, see [uwot::umap2()].
@@ -134,9 +136,9 @@
 #' * `negative_sample_rate` :: `numeric(1)`\cr
 #'   The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
 #'   in optimizing the low dimensional embedding. Default is `5`. For details, see [uwot::umap2()].
-#' * `a` :: `any`\cr
+#' * `a` :: `numeric(1)`\cr
 #'   More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()].
-#' * `b` :: `any`\cr
+#' * `b` :: `numeric(1)`\cr
 #'   More specific parameters controlling the embedding. Default is `NULL`. For details, see [uwot::umap2()].
 #' * `nn_method` :: `character(1)`\cr
 #'   Method for finding nearest neighbors. Note that only values compatible with [uwot::umap_transform()] are allowed.
@@ -150,20 +152,20 @@
 #' * `approx_pow` :: `logical(1)`\cr
 #'   If `TRUE`, use an approximation to the power function in the UMAP gradient. Default is `FALSE`.
 #'   For details, see [uwot::umap2()].
-#' * `y` :: `any`\cr
-#'   Optional target data for supervised dimension reduction. Default is `NULL`.
-#'   For details, see [uwot::umap2()].
+#'   `use_supervised` :: `logical(1)`\cr
+#'   If `TRUE`, perform supervised dimension reduction. This is done by passing the task's target to [uwot::umap2()]'s `y` argument.
+#'   For details, see there. Initialized to `FALSE`.
 #' * `target_n_neighbors` :: `integer(1)`\cr
-#'   Number of nearest neighbors to use to construct the target simplicial set. Default is `n_neighbors`.
-#'   For details, see [uwot::umap2()].
+#'   Number of nearest neighbors to use to construct the target simplicial set. Only used when performing supervised dimension reduction.
+#'   Default is `n_neighbors`. For details, see [uwot::umap2()].
 #' * `target_metric` :: `character(1)`\cr
-#'   The metric used to measure distance for `y` if using supervised dimension reduction.
+#'   The metric used to measure distance for the task's target when performing supervised dimension reduction.
 #'   For details, see [uwot::umap2()].
 #' * `target_weight` :: `numeric(1)`\cr
-#'   Weighting factor between data topology and target topology. Default is `0.5`.
-#'   For details, see [uwot::umap2()].
+#'   Weighting factor between data topology and target topology. Only used when performing supervised dimension reduction.
+#'   Default is `0.5`. For details, see [uwot::umap2()].
 #' * `pca` :: `integer(1)`\cr
-#'   Redude data to this number of columns using PCA. Default is `NULL`.
+#'   Reduce data to this number of columns using PCA. Default is `NULL`.
 #'   For details, see [uwot::umap2()].
 #' * `pca_center` :: `logical(1)`\cr
 #'   If `TRUE`, center the columns of X before carrying out PCA. Default is `TRUE`.
@@ -187,10 +189,10 @@
 #'   The minimum amount of work to do on each thread. Default is `1`.
 #'   For details, see [uwot::umap2()].
 #' * `verbose` :: `logical(1)`\cr
-#'   Should details be printed? Initialzed to `FALSE`. For details, see [uwot::umap2()].
+#'   Should details be printed? Initialized to `FALSE`. For details, see [uwot::umap2()].
 #' * `batch` :: `logical(1)`\cr
 #'   If `TRUE`, then embedding coordinates are updated at the end of each epoch rather
-#'   than during the epoch. Default is `FALSE`. For details, see [uwot::umap2()].
+#'   than during the epoch. Default is `TRUE`. For details, see [uwot::umap2()].
 #' * `opt_args` :: named `list()`\cr
 #'   A list of optimizer parameters, used when `batch = TRUE`. Default is `NULL`.
 #'   For details, see [uwot::umap2()].
@@ -213,6 +215,28 @@
 #'   A list containing additional arguments to pass to the nearest neighbor method.
 #'   Default is `NULL`. For details, see [uwot::umap2()].
 #'
+#' Additionally, there are several parameters that may be used to overwrite parameter values for prediction:
+#' * `search_k_transform` :: `integer(1)`\cr
+#'   Number of nodes to search during the neighbor retrieval when predicting.
+#'   Only used if `nn_method` is `"annoy"`. If `NULL`, `search_k` is used instead. Default is `NULL`. For details, see [uwot::umap_transform()].
+#' * `n_epochs_transform` :: `integer(1)`\cr
+#'   Number of epochs used during the optimization of the embedded coordinates when predicting.
+#'   If `NULL`, `n_epochs` is used instead. Default is `NULL`. For details, see [uwot::umap_transform()].
+#' * `init_transform` :: `character(1)`\cr
+#'   Type of initialization for the coordinates when predicting. May be set to `"custom"`, in which case the `matrix` of initial
+#'   coordinates passed to `init_transform_custom` is used. Default is `"weighted"`. For details, see [uwot::umap_transform()].
+#' * `init_transform_custom` :: `matrix`\cr
+#'   Matrix of initial coordinates when predicting Only used, if `init_transform` is `"custom"`.
+#' * `batch_transform` :: `logical(1)`\cr
+#'   If `TRUE`, embedding coordinates are updated at the end of each epoch rather than during the epoch when predicting.
+#'   If `NULL`, `batch` is used instead. Default is `FALSE`. For details, see [uwot::umap_transform()].
+#' * `learning_rate_transform` :: `numeric(1)`\cr
+#'   Initial learning rate used in optimization of the coordinates when predicting.
+#'   If `NULL`, `learning_rate` is used instead. Default is `NULL`. For details, see [uwot::umap_transform()].
+#' * `epoch_callback_transform` :: `function`\cr
+#'   A function which will be invoked at the end of every epoch when predicting.
+#'   Default is `NULL`. For details, see [uwot::umap_transform()].
+#'
 #' @section Internals:
 #' Uses the [umap2()][uwot::umap2] function.
 #'
@@ -257,15 +281,19 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         ),
         n_epochs = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         learning_rate = p_dbl(lower = 0, default = 1, tags = c("train", "umap")),
-        scale = p_lgl(default = FALSE, special_vals = list("none", "Z", "scale", "maxabs", "range", "colrange", NULL), tags = c("train", "umap")),
-        init = p_uty(
+        scale = p_fct(
+          levels = c("none", "scale", "maxabs", "range", "colrange"),
+          special_vals = list(FALSE, NULL, "Z", TRUE),
+          default = FALSE,
+          tags = c("train", "umap")
+        ),
+        init = p_fct(
+          levels = c("spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", "agspectral"),
+          special_vals = list("custom"),
           default = "spectral",
-          tags = c("train", "umap"),
-          custom_check = crate(function(x) {
-            choices = c("spectral", "normlaplacian", "random", "lvrandom", "laplacian", "pca", "spca", "agspectral")
-            check_choice(x, choices) %check||% check_matrix(x)
-          }, .parent = topenv())
+          tags = c("train", "umap")
         ),
+        init_custom = p_uty(custom_check = check_matrix, tags = "train", depends = quote(init == "custom")),
         init_sdev = p_dbl(default = "range", special_vals = list("range"), tags = c("train", "umap")),
         spread = p_dbl(default = 1, tags = c("train", "umap")),
         min_dist = p_dbl(default = 0.01, tags = c("train", "umap")),
@@ -274,31 +302,15 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
         bandwidth = p_dbl(default = 1, tags = c("train", "umap")),
         repulsion_strength = p_dbl(default = 1, tags = c("train", "umap")),
         negative_sample_rate = p_dbl(default = 5, tags = c("train", "umap")),
-        a = p_uty(default = NULL, tags = c("train", "umap")),
-        b = p_uty(default = NULL, tags = c("train", "umap")),
-        nn_method = p_uty(
-          default = NULL,
-          tags = c("train", "umap"),
-          custom_check = crate(function(x) check_choice(x, c("annoy", "hnsw", "nndescent"), null.ok = TRUE))
-        ),
+        a = p_dbl(default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        b = p_dbl(default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
+        nn_method = p_fct(levels = c("annoy", "hnsw", "nndescent"), default = NULL, special_vals = list(NULL), tags = c("train", "umap")),
         n_trees = p_int(lower = 1L, default = 50L, tags = c("train", "umap"), depends = quote(nn_method == "annoy")),
         search_k = p_int(tags = c("train", "umap"), depends = quote(nn_method == "annoy")),
         # approx_pow is only used if dens_scale is non-NULL
         approx_pow = p_lgl(default = FALSE, tags = c("train", "umap")),
-        y = p_uty(
-          default = NULL,
-          tags = c("train", "umap"),
-          custom_check = crate(function(x) {
-            check_atomic_vector(x) %check||%
-              check_matrix(x) %check||%
-              check_data_frame(x) %check||%
-              check_list(x, len = 2, names = "unique") %check||%
-              check_null(x)
-          }, .parent = topenv())
-        ),
-        # target_n_neighbors is only used if y is non-NULL and numeric
-        target_n_neighbors = p_int(tags = c("train", "umap")),
-        # target_metric is only used if y is non-NULL and numeric
+        use_supervised = p_lgl(default = FALSE, tags = c("train")),
+        target_n_neighbors = p_int(tags = c("train", "umap"), depends = quote(use_supervised == TRUE)),
         target_metric =  p_fct(
           levels = c(
             "euclidean", "cosine", "manhattan", "hamming", "correlation",
@@ -307,10 +319,10 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
             "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
           ),
           default = "euclidean",
-          tags = c("train", "umap")
+          tags = c("train", "umap"),
+          depends = quote(use_supervised == TRUE)
         ),
-        # target_weight is only used if y is non-NULL
-        target_weight = p_dbl(lower = 0, upper = 1, default = 0.5, tags = c("train", "umap")),
+        target_weight = p_dbl(lower = 0, upper = 1, default = 0.5, tags = c("train", "umap"), depends = quote(use_supervised == TRUE)),
         # pca is ignored if metric is "hamming"
         pca = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "umap"),
                     depends = quote(metric %in% c(
@@ -319,15 +331,15 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
                       "jensenshannon", "kulsinski", "rogerstanimoto", "russellrao", "sokalmichener",
                       "sokalsneath", "spearmanr", "symmetrickl", "tsss", "yule"
                     ))),
-        # pca_center might only be used if pca is specified (documentation unclear)
+        # pca_center is only used if pca is specified
         pca_center = p_lgl(default = TRUE, tags = c("train", "umap")),
         pcg_rand = p_lgl(default = TRUE, tags = c("train", "umap")),
         fast_sgd = p_lgl(default = FALSE, tags = c("train", "umap")),
         n_threads = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("train", "predict", "umap")),
         n_sgd_threads = p_int(lower = 0L, default = 0L, special_vals = list("auto"), tags = c("train", "predict", "umap")),
-        grain_size = p_int(lower = 1L, default = 1L, tags = c("train", "umap")),
-        verbose = p_lgl(default = TRUE, tags = c("train", "umap")),
-        batch = p_lgl(default = FALSE, tags = c("train", "umap")),
+        grain_size = p_int(lower = 1L, default = 1L, tags = c("train", "predict", "umap")),
+        verbose = p_lgl(default = TRUE, tags = c("train", "predict", "umap")),
+        batch = p_lgl(default = TRUE, tags = c("train", "umap")),
         opt_args = p_uty(
           default = NULL,
           tags = c("train", "umap"),
@@ -350,38 +362,69 @@ PipeOpUMAP = R6Class("PipeOpUMAP",
           tags = c("train", "umap"),
           custom_check = crate(function(x) check_list(x, types = c("integer", "numeric", "character"),
                                                       min.len = 1, max.len = 8, names = "unique", null.ok = TRUE))
+        ),
+        # Parameters that are passed to umap_transform to overwrite parameters from training for prediction
+        search_k_transform = p_int(default = NULL, special_vals = list(NULL), tags = c("predict", "overwrite"), depends = quote(nn_method == "annoy")),
+        n_epochs_transform = p_int(lower = 1L, default = NULL, special_vals = list(NULL), tags = c("predict", "overwrite")),
+        init_transform = p_fct(levels = c("weighted", "average"), special_vals = list("custom"), default = "weighted", tags = c("predict", "overwrite")),
+        init_transform_custom = p_uty(custom_check = check_matrix, tags = "predict", depends = quote(init_transform == "custom")),
+        batch_transform = p_lgl(default = FALSE, special_vals = list(NULL), tags = c("predict", "overwrite")),
+        learning_rate_transform = p_dbl(default = NULL, special_vals = list(NULL), tags = c("predict", "overwrite")),
+        epoch_callback_transform = p_uty(
+          default = NULL,
+          tags = c("predict", "overwrite"),
+          custom_check = crate(function(x) check_function(x, args = c("epochs", "n_epochs", "coords", "fixed_coords"), null.ok = TRUE))
         )
       )
-      ps$set_values(verbose = FALSE)
+      ps$values = list(verbose = FALSE, use_supervised = FALSE)
 
       super$initialize(id, param_set = ps, param_vals = param_vals, packages = "uwot", feature_types = c("numeric", "integer"))
     }
   ),
   private = list(
     .train_dt = function(dt, levels, target) {
-      params = insert_named(self$param_set$get_values(tags = c("umap", "train")), list(ret_model = TRUE))
-      umap = invoke(uwot::umap2, dt, .args = params)
+      pv = self$param_set$values
+      pv_args = self$param_set$get_values(tags = c("umap", "train"))
+      # Indicate that umap2() should return the full model which we need for prediction
+      pv_args = insert_named(pv_args, list(ret_model = TRUE))
+      # Use target for supervised dimension reduction when specified
+      if (!is.null(pv$use_supervised) && pv$use_supervised) {
+        pv_args = insert_named(pv_args, list(y = target))
+      }
+      # Use matrix passed to init_custom for initialization when specified
+      if (!is.null(pv$init) && pv$init == "custom") {
+        pv_args = insert_named(pv_args, list(init = pv$init_custom))
+      }
+      umap = invoke(uwot::umap2, dt, .args = pv_args)
       self$state = umap
       umap$embedding
     },
 
     .predict_dt = function(dt, levels) {
-      params = self$param_set$get_values(tags = c("umap", "predict"))
-      invoke(uwot::umap_transform, dt, self$state, .args = params)
+      pv = self$param_set$values
+      pv_args = self$param_set$get_values(tags = c("umap", "predict"))
+      # Get overwriting params and rename them to the correct argument names for uwot::umap_transform()
+      overwrite_pv_args = self$param_set$get_values(tags = c("overwrite", "predict"))
+      names(overwrite_pv_args) <- sub("_transform$", "", names(overwrite_pv_args))
+      pv_args = insert_named(pv_args, overwrite_pv_args)
+      # Use matrix passed to init_transform_custom for initialization when specified
+      if (!is.null(pv$init_transform) && pv$init_transform == "custom") {
+        pv_args = insert_named(pv_args, list(init = pv$init_transform_custom))
+      }
+      invoke(uwot::umap_transform, dt, self$state, .args = pv_args)
     },
 
-    # We need to overload deep_clone since state$nn_index$ann is a C++ address if nn_method is "annoy" or "hnsw"
+    # We need to overload deep_clone since state$nn_index$ann is a RefClass if nn_method is "annoy" or "hnsw"
     deep_clone = function(name, value) {
       if (name == "state" && "NO_OP" %nin% class(value)) {
-        # TODO: Make sure these class names are correct for different options for nn_args
-        # attr(attr(value$nn_index, "class"), "package") might work otherwise
-        if (class(value$nn_index$ann) %in% c("RcppHNSWL2", "Rcpp_AnnoyEuclidean")) {
-          state = value
-          state$nn_index$ann = value$nn_index$ann$copy()
-          state$nn_index$type = value$nn_index$type
-          state$nn_index$metric = value$nn_index$metric
-          state$nn_index$ndim = value$nn_index$ndim
-          state
+        if (!is.null(value$nn_index)) {
+          if (methods::is(value$nn_index$ann, "envRefClass")) {
+            state = value
+            state$nn_index$ann = value$nn_index$ann$copy()
+            state
+          } else {
+            super$deep_clone(name, value)
+          }
         } else {
           super$deep_clone(name, value)
         }
diff --git a/man/mlr_pipeops_umap.Rd b/man/mlr_pipeops_umap.Rd
index 69ebd3124..adf871539 100644
--- a/man/mlr_pipeops_umap.Rd
+++ b/man/mlr_pipeops_umap.Rd
@@ -121,9 +121,11 @@ Initial learning rate used in optimization of the coordinates. Default is \code{
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{scale} :: \code{logical(1)} / \code{character(1)}\cr
 Scaling to apply to the data. If \code{TRUE}, data is standardized. Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
-\item \code{init} :: \code{character(1)} | \code{matrix}\cr
-Type of initialization for the coordinates. Default is \code{"spectral"}.
-For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+\item \code{init} :: \code{character(1)}\cr
+Type of initialization for the coordinates. May be set to \code{"custom"}, in which case the \code{matrix} of initial
+coordinates passed to \code{init_custom} is used. Default is \code{"spectral"}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+\item \code{init_custom} :: \code{matrix}\cr
+Matrix of initial coordinates. Only used, if \code{init} is \code{"custom"}.
 \item \code{init_sdev} :: \code{character(1)} | \code{numeric(1)}\cr
 Scales each dimension of the initialized coordinates to this standard deviation.
 Default is \code{"range"}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
@@ -148,9 +150,9 @@ Default is \code{1}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{negative_sample_rate} :: \code{numeric(1)}\cr
 The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample
 in optimizing the low dimensional embedding. Default is \code{5}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
-\item \code{a} :: \code{any}\cr
+\item \code{a} :: \code{numeric(1)}\cr
 More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
-\item \code{b} :: \code{any}\cr
+\item \code{b} :: \code{numeric(1)}\cr
 More specific parameters controlling the embedding. Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{nn_method} :: \code{character(1)}\cr
 Method for finding nearest neighbors. Note that only values compatible with \code{\link[uwot:umap_transform]{uwot::umap_transform()}} are allowed.
@@ -164,20 +166,20 @@ For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{approx_pow} :: \code{logical(1)}\cr
 If \code{TRUE}, use an approximation to the power function in the UMAP gradient. Default is \code{FALSE}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
-\item \code{y} :: \code{any}\cr
-Optional target data for supervised dimension reduction. Default is \code{NULL}.
-For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+\code{use_supervised} :: \code{logical(1)}\cr
+If \code{TRUE}, perform supervised dimension reduction. This is done by passing the task's target to \code{\link[uwot:umap2]{uwot::umap2()}}'s \code{y} argument.
+For details, see there. Initialized to \code{FALSE}.
 \item \code{target_n_neighbors} :: \code{integer(1)}\cr
-Number of nearest neighbors to use to construct the target simplicial set. Default is \code{n_neighbors}.
-For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Number of nearest neighbors to use to construct the target simplicial set. Only used when performing supervised dimension reduction.
+Default is \code{n_neighbors}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_metric} :: \code{character(1)}\cr
-The metric used to measure distance for \code{y} if using supervised dimension reduction.
+The metric used to measure distance for the task's target when performing supervised dimension reduction.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{target_weight} :: \code{numeric(1)}\cr
-Weighting factor between data topology and target topology. Default is \code{0.5}.
-For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Weighting factor between data topology and target topology. Only used when performing supervised dimension reduction.
+Default is \code{0.5}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca} :: \code{integer(1)}\cr
-Redude data to this number of columns using PCA. Default is \code{NULL}.
+Reduce data to this number of columns using PCA. Default is \code{NULL}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{pca_center} :: \code{logical(1)}\cr
 If \code{TRUE}, center the columns of X before carrying out PCA. Default is \code{TRUE}.
@@ -203,10 +205,10 @@ For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 The minimum amount of work to do on each thread. Default is \code{1}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{verbose} :: \code{logical(1)}\cr
-Should details be printed? Initialzed to \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+Should details be printed? Initialized to \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{batch} :: \code{logical(1)}\cr
 If \code{TRUE}, then embedding coordinates are updated at the end of each epoch rather
-than during the epoch. Default is \code{FALSE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
+than during the epoch. Default is \code{TRUE}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 \item \code{opt_args} :: named \code{list()}\cr
 A list of optimizer parameters, used when \code{batch = TRUE}. Default is \code{NULL}.
 For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
@@ -229,6 +231,30 @@ Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}
 A list containing additional arguments to pass to the nearest neighbor method.
 Default is \code{NULL}. For details, see \code{\link[uwot:umap2]{uwot::umap2()}}.
 }
+
+Additionally, there are several parameters that may be used to overwrite parameter values for prediction:
+\itemize{
+\item \code{search_k_transform} :: \code{integer(1)}\cr
+Number of nodes to search during the neighbor retrieval when predicting.
+Only used if \code{nn_method} is \code{"annoy"}. If \code{NULL}, \code{search_k} is used instead. Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}.
+\item \code{n_epochs_transform} :: \code{integer(1)}\cr
+Number of epochs used during the optimization of the embedded coordinates when predicting.
+If \code{NULL}, \code{n_epochs} is used instead. Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}.
+\item \code{init_transform} :: \code{character(1)}\cr
+Type of initialization for the coordinates when predicting. May be set to \code{"custom"}, in which case the \code{matrix} of initial
+coordinates passed to \code{init_transform_custom} is used. Default is \code{"weighted"}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}.
+\item \code{init_transform_custom} :: \code{matrix}\cr
+Matrix of initial coordinates when predicting Only used, if \code{init_transform} is \code{"custom"}.
+\item \code{batch_transform} :: \code{logical(1)}\cr
+If \code{TRUE}, embedding coordinates are updated at the end of each epoch rather than during the epoch when predicting.
+If \code{NULL}, \code{batch} is used instead. Default is \code{FALSE}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}.
+\item \code{learning_rate_transform} :: \code{numeric(1)}\cr
+Initial learning rate used in optimization of the coordinates when predicting.
+If \code{NULL}, \code{learning_rate} is used instead. Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}.
+\item \code{epoch_callback_transform} :: \code{function}\cr
+A function which will be invoked at the end of every epoch when predicting.
+Default is \code{NULL}. For details, see \code{\link[uwot:umap_transform]{uwot::umap_transform()}}.
+}
 }
 
 \section{Internals}{

From 28677597b74e7eb96201af31e43455627edf9ea6 Mon Sep 17 00:00:00 2001
From: kenomersmannPC <advieser@gmail.com>
Date: Fri, 23 Aug 2024 16:50:11 +0200
Subject: [PATCH 36/36] changed tests to fit new param scheme

---
 tests/testthat/test_pipeop_umap.R | 54 +++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/tests/testthat/test_pipeop_umap.R b/tests/testthat/test_pipeop_umap.R
index 94d766365..cbcde57d5 100644
--- a/tests/testthat/test_pipeop_umap.R
+++ b/tests/testthat/test_pipeop_umap.R
@@ -20,6 +20,7 @@ test_that("PipeOpUMAP - basic properties", {
 
 test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default Params, nn_method = annoy", {
   skip_if_not_installed("uwot")
+  skip_if_not_installed("RcppAnnoy")
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
@@ -33,7 +34,7 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Default
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names_wo_pointers = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names_wo_pointers = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names_wo_pointers], umap_out[state_names_wo_pointers])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
@@ -50,6 +51,8 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
+
+  # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
   pv = list(
     seed = 1234L,
     nn_method = "annoy",
@@ -64,24 +67,31 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
     local_connectivity = 1.1,
     bandwidth = 0.9,
     repulsion_strength = 1.1,
-    negative_sample_rate = 6,
-    y = task$data()[, 1]
+    negative_sample_rate = 6
   )
-  op$param_set$set_values(.values = pv)
+  # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
+  pv_po = insert_named(pv, list(use_supervised = TRUE,
+                                batch_transform = TRUE,
+                                init_transform = "average",
+                                search_k_transform = 1000L))
+  op$param_set$set_values(.values = pv_po)
+  args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1]))
+  args_umap_transform = list(init = "average", search_k = 1000L, batch = TRUE)
 
   train_out = train_pipeop(op, list(task))[[1L]]
-  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2)
 
   state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names], umap_out[state_names])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
   predict_out = predict_pipeop(op, list(task))[[1L]]
-  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform)
+
   expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })
@@ -93,6 +103,8 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
+
+  # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
   pv = list(
     seed = 1234L,
     nn_method = "hnsw",
@@ -108,24 +120,27 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
     bandwidth = 0.9,
     repulsion_strength = 1.1,
     negative_sample_rate = 6,
-    y = task$data()[, 1],
     nn_args = list(M = 10L, ef_construction = 100L, ef = 20L)
   )
-  op$param_set$set_values(.values = pv)
+  # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
+  pv_po = insert_named(pv, list(use_supervised = TRUE, init_transform = "average"))
+  op$param_set$set_values(.values = pv_po)
+  args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1]))
+  args_umap_transform = list(init = "average")
 
   train_out = train_pipeop(op, list(task))[[1L]]
-  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2)
 
   state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
-  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names], umap_out[state_names])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
   predict_out = predict_pipeop(op, list(task))[[1L]]
-  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform)
   expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })
@@ -137,6 +152,8 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
   task = mlr_tasks$get("iris")$filter(1:30)
 
   op = PipeOpUMAP$new()
+
+  # BUild list of param with same names for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
   pv = list(
     seed = 1234L,
     nn_method = "nndescent",
@@ -152,25 +169,28 @@ test_that("PipeOpUMAP - Compare to uwot::umap2 and uwot::umap_transform; Changed
     bandwidth = 0.9,
     repulsion_strength = 1.1,
     negative_sample_rate = 6,
-    y = task$data()[, 1],
     nn_args = list(n_trees = 15L, max_candidates = 15L, pruning_degree_multiplier = 1.4, epsilon = 0.05)
   )
-  op$param_set$set_values(.values = pv)
+  # Handle parameters that are differently named for PipeOpUMAP and uwot::umap2() / uwot::umap_transform()
+  pv_po = insert_named(pv, list(use_supervised = TRUE, init_transform = "average"))
+  op$param_set$set_values(.values = pv_po)
+  args_umap2 = insert_named(pv, list(ret_model = TRUE, y = task$data()[, 1]))
+  args_umap_transform = list(init = "average")
 
   train_out = train_pipeop(op, list(task))[[1L]]
-  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], ret_model = TRUE, .args = pv)
+  umap_out = invoke(uwot::umap2, X = task$data()[, 2:5], .args = args_umap2)
 
   state_names = c("embedding", "scale_info", "search_k", "local_connectivity", "n_epochs", "alpha", "negative_sample_rate", "method", "a", "b",
                   "gamma", "approx_pow", "metric", "norig_col", "pcg_rand", "batch", "opt_args", "num_precomputed_nns", "min_dist", "spread",
                   "binary_edge_weights", "seed", "nn_method", "nn_args", "n_neighbors", "nn_index", "pca_models")
   expect_true(all(state_names %in% names(op$state)))
 
-  state_names = setdiff(state_names, "nn_index") #  since address in state$nn_index$ann will not be equal
+  state_names = setdiff(state_names, "nn_index") #  since RefClass in state$nn_index$ann will not be equal
   expect_identical(op$state[state_names], umap_out[state_names])
   expect_equal(train_out$data()[, 2:3], as.data.table(umap_out[["embedding"]]))
 
   predict_out = predict_pipeop(op, list(task))[[1L]]
-  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out)
+  umap_transform_out = invoke(uwot::umap_transform, X = task$data()[, 2:5], model = umap_out, .args = args_umap_transform)
   expect_equal(predict_out$data()[, 2:3], as.data.table(umap_transform_out))
 
 })