mlr-org
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 15 deletions b/‎.gitignore‎
Lines changed: 2 additions & 15 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 6 additions & 1 deletion b/‎DESCRIPTION‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/Graph.R‎
Lines changed: 1 addition & 0 deletions b/‎R/Graph.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/PipeOpInfo.R‎
Lines changed: 167 additions & 0 deletions b/‎R/PipeOpInfo.R‎
Lines changed: 167 additions & 0 deletions
diff --git a/‎R/PipeOpIsomap.R‎
Lines changed: 97 additions & 0 deletions b/‎R/PipeOpIsomap.R‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎R/zzz.R‎
Lines changed: 1 addition & 1 deletion b/‎R/zzz.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎man/PipeOp.Rd‎
Lines changed: 2 additions & 0 deletions b/‎man/PipeOp.Rd‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎man/PipeOpEncodePL.Rd‎
Lines changed: 2 additions & 0 deletions b/‎man/PipeOpEncodePL.Rd‎
Lines changed: 2 additions & 0 deletions
@@ -11,57 +11,44 @@
 *.swp
 doc
 Meta
-
 .vscode/*
 !.vscode/settings.json
 !.vscode/tasks.json
 !.vscode/launch.json
 !.vscode/extensions.json
 *.code-workspace
-
 # Local History for Visual Studio Code
 .history/
-
 # History files
 .Rhistory
 .Rapp.history
-
 # Session Data files
 .RData
-
 # User-specific files
 .Ruserdata
-
 # Example code in package build process
 *-Ex.R
-
 # Output files from R CMD build
 /*.tar.gz
-
 # Output files from R CMD check
 /*.Rcheck/
-
 # RStudio files
 .Rproj.user/
-
 # produced vignettes
 vignettes/*.html
 vignettes/*.pdf
-
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth
-
 # knitr and R markdown default cache directories
 *_cache/
 /cache/
-
 # Temporary files created by R markdown
 *.utf8.md
 *.knit.md
-
 # R Environment Variables
 .Renviron
-
 # pkgdown site
 docs/
 /Meta/
+logger_file
+.DS_Store
@@ -101,7 +101,10 @@ Suggests:
     future,
     htmlwidgets,
     ranger,
-    themis
+    themis,
+    dimRed,
+    RSpectra,
+    RANN
 ByteCompile: true
 Encoding: UTF-8
 Config/testthat/edition: 3
@@ -159,6 +162,8 @@ Collate:
     'PipeOpImputeMode.R'
     'PipeOpImputeOOR.R'
     'PipeOpImputeSample.R'
+    'PipeOpInfo.R'
+    'PipeOpIsomap.R'
     'PipeOpKernelPCA.R'
     'PipeOpLearner.R'
     'PipeOpLearnerCV.R'
 
@@ -133,6 +133,8 @@ export(PipeOpImputeMedian)
 export(PipeOpImputeMode)
 export(PipeOpImputeOOR)
 export(PipeOpImputeSample)
+export(PipeOpInfo)
+export(PipeOpIsomap)
 export(PipeOpKernelPCA)
 export(PipeOpLearner)
 export(PipeOpLearnerCV)
 
@@ -6,6 +6,8 @@
 * Added support for internal validation tasks to `PipeOpFeatureUnion`.
 * feat: `PipeOpLearnerCV` can reuse the cross-validation models during prediction by averaging their outputs (`resampling.predict_method = "cv_ensemble"`).
 * feat: `PipeOpRegrAvg` gets new `se_aggr` and `se_aggr_rho` hyperparameters and now allows various forms of SE aggregation.
+* Fix: `PipeOpInfo` now prints a bounded task preview (respecting target/feature ordering and row ids) and collapses logger output to single messages.
+* Fix: `PipeOpIsomap` only operates on numeric or integer features and its parameter documentation was corrected.
 * Fix: `PipeOpRemoveConstants` now avoids integer overflow when evaluating relative tolerances for near-`integer.max` data.
 * Compatibility with new testthat version 3.3.0
 
 
@@ -730,6 +730,7 @@ graph_load_namespaces = function(self, info) {
 
 
 #' @export
+#' @method predict Graph
 predict.Graph = function(object, newdata, ...) {
   if (!object$is_trained) {
     stop("Cannot predict, Graph has not been trained yet")
 
@@ -0,0 +1,167 @@
+#' @title Customizable Information Printer
+#'
+#' @usage NULL
+#' @name mlr_pipeops_info
+#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOp`]
+#'
+#' @description
+#' `PipeOpInfo` prints its input to the console or a logger in a customizable way.
+#' Users can define how specific object classes should be displayed using custom printer functions.
+#'
+#' @section Construction:
+#' ```
+#' PipeOpInfo$new(id = "info", collect_multiplicity = FALSE, log_target = "lgr::mlr3/mlr3pipelines::info")
+#' ```
+#' * `id` :: `character(1)`\cr
+#'   Identifier of resulting object, default "info"
+#' * `printer` :: `list` \cr
+#'   Optional mapping from object classes to printer functions. Custom functions override default printer-functions.
+#' * `collect_multiplicity` :: `logical(1)`\cr
+#'   If `TRUE`, the input is a [`Multiplicity`] collecting channel. [`Multiplicity`] input/output is accepted and the members are aggregated.
+#' * `log_target` :: `character(1)`\cr
+#'   Specifies how the input object is printed to the console. By default it is
+#'   directed to a logger, whose address can be customized using the form
+#'   `<output>::<argument1>::<argument2>`. Otherwise it can be printed
+#'   as "message", "warning" or "cat". When set to "none", no customized
+#'   information about the object will be printed.
+#'
+#' @section Input and Output Channels:
+#' `PipeOpInfo` has one input channel called "input", it can take any type of input (`*`).
+#' `PipeOpInfo` has one output channel called "output", it can take any type of output (`*`).
+#'
+#' @section State:
+#' The `$state` is left empty (`list()`).
+#'
+#' @section Internals:
+#' `PipeOpInfo` forwards its input unchanged, but prints information about it
+#' depending on the `printer` and `log_target` settings.
+#'
+#' @section Fields:
+#' Fields inherited from `PipeOp`, as well as:
+#' * `printer` :: `list`\cr
+#'   Mapping of object classes to printer functions. Includes printer-specifications for `Task`, `Prediction`, `NULL`. Otherwise object is printed as is.
+#' * `log_target` :: `character(1)` \cr
+#'   Specifies current output target.
+#'
+#' @section Methods:
+#' Only methods inherited from [`PipeOp`].
+#'
+#' @examples
+#' library("mlr3")
+#'
+#' poinfo = po("info")
+#' poinfo$train(list(tsk("mtcars")))
+#' poinfo$predict(list(tsk("mtcars")))
+#'
+#' # Specify customized console output for Task-objects
+#' poinfo = po("info", log_target = "cat",
+#'   printer = list(Task = function(x) list(head_data = head(x$data()), nrow = nrow(x$data())))
+#' )
+#'
+#' poinfo$train(list(tsk("iris")))
+#' poinfo$predict(list(tsk("iris")))
+#'
+#' @family PipeOps
+#' @template seealso_pipeopslist
+#' @include PipeOp.R
+#' @export
+#'
+#'
+
+PipeOpInfo = R6Class("PipeOpInfo",
+  inherit = PipeOp,
+  public = list(
+    initialize = function(id = "info", printer = NULL, collect_multiplicity = FALSE, log_target = "lgr::mlr3/mlr3pipelines::info", param_vals = list()) {
+      assertString(log_target, pattern = "^(cat|none|warning|message|lgr::[^:]+::[^:]+)$")
+      inouttype = "*"
+      if (collect_multiplicity) {
+        inouttype = sprintf("[%s]", inouttype)
+      }
+      super$initialize(id, param_vals = param_vals,
+        input = data.table(name = "input", train = inouttype, predict = inouttype),
+        output = data.table(name = "output", train = inouttype, predict = inouttype)
+        #tag = "debug"
+      )
+      original_printer = list(
+        Task = crate(function(x) {
+          row_preview = head(x$row_ids, 10L)
+          col_preview = head(c(x$target_names, x$feature_names), 10L)
+          data_preview = x$data(rows = row_preview, cols = col_preview)
+          list(
+            task = x,
+            data_preview = data_preview
+          )
+        }),
+        Prediction = crate(function(x) {
+          tryCatch(list(prediction = x, score = x$score()), error = function(e) {list(prediction = x)})
+        }),
+        `NULL` = crate(function(x) "NULL"),
+        default = crate(function(x) x)
+      )
+      private$.printer = insert_named(original_printer, printer)
+      private$.log_target = log_target
+    }
+  ),
+  active = list(
+    printer = function(rhs) {
+      if (!missing(rhs)) stop("printer is read only.")
+      private$.printer
+    },
+    log_target = function(rhs) {
+      if (!missing(rhs)) stop("log_target is read only.")
+      private$.log_target
+    }
+  ),
+  private = list(
+    .printer = NULL,
+    .log_target = NULL,
+    .output = function(inputs, stage) {
+      input_class = class(inputs[[1]])
+      leftmost_class =
+        if (any(input_class %in% names(private$.printer))) {
+          input_class[input_class %in% names(private$.printer)][[1]]
+        } else {
+          "default"
+        }
+      if (!("default" %in% names(private$.printer))) {
+        stop("Object-class was not found and no default printer is available.")
+      }
+      specific_printer = private$.printer[[leftmost_class]]
+      log_target_split = strsplit(private$.log_target, "::")[[1]]
+      stage_string = sprintf("Object passing through PipeOp %s - %s", self$id, stage)
+      print_string = utils::capture.output({
+        cat(stage_string, "\n\n")
+        specific_printer(inputs[[1]])
+      })
+      message_text = paste(print_string, collapse = "\n")
+      if (log_target_split[[1]] == "lgr") {
+        logger = lgr::get_logger(log_target_split[[2]])
+        log_level = log_target_split[[3]]
+        logger$log(log_level, msg = message_text)
+      } else if (private$.log_target == "cat") {
+        cat(message_text)
+      } else if (private$.log_target == "message") {
+        message(message_text)
+      } else if (private$.log_target == "warning") {
+        warning(message_text)
+      } else if (private$.log_target == "none") {
+      } else {
+        stopf("Invalid log_target '%s'.", private$.log_target)
+      }
+    },
+    .train = function(inputs, stage = "Training") {
+      self$state = list()
+      private$.output(inputs, stage)
+      inputs
+    },
+    .predict = function(inputs, stage = "Prediction") {
+      private$.output(inputs, stage)
+      inputs
+    },
+    .additional_phash_input = function() {
+      list(printer = self$printer, log_target = self$log_target)
+    }
+  )
+)
+
+mlr_pipeops$add("info", PipeOpInfo)
@@ -0,0 +1,97 @@
+#' @title Algorithm for Dimensionality Reduction
+#'
+#' @usage NULL
+#' @name mlr_pipeops_isomap
+#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]
+#'
+#' @description
+#' Reduces the dimensionality of the data of the input [`Task`][mlr3::Task] using the
+#' Isomap algorithm from the `dimRed`-package, preserving geodesic distances
+#' between observations. The number of neighbors (`knn`) and embedding
+#' dimensions (`ndim`) control the transformation.
+#'
+#'
+#' @section Construction:
+#' ```
+#' PipeOpIsomap$new(id = "isomap", ...)
+#' ```
+#'
+#' * `id` :: `character(1)`\cr
+#'   Identifier of resulting object, default `"isomap"`
+#' * `param_vals` :: named `list`\cr
+#'   List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
+#'
+#' @section Input and Output Channels:
+#' Input and output channels are inherited from [`PipeOpTaskPreproc`].
+#'
+#' The output is the input [`Task`][mlr3::Task] with the data projected to the lower-dimensional space.
+#'
+#' @section State:
+#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as:
+#' * `embed_result` :: `dimRedResult`\cr
+#'   The resulting object after applying the "Isomap"-method from the `dimRed`-package to the data.
+#'
+#' @section Parameters:
+#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
+#' * `knn` :: `integer(1)`\cr
+#'   The number of nearest neighbors in the graph.
+#'   Initialized to 50.
+#' * `ndim` :: `integer(1)`\cr
+#'   The number of embedding dimensions.
+#'   Initialized to 2.
+#' * `get_geod` :: `logical(1)`\cr
+#'   Determines whether the distance matrix should be kept in the `$state`.
+#'   Initialized to `FALSE`.
+#' * `.mute` :: `character`\cr
+#'   A character vector of elements to mute during training (e.g. c("message", "output")).
+#'   Initialized to `NULL`.
+#'
+#' @section Internals:
+#' Applies the Isomap embedding from the `dimRed`-package.
+#'
+#' @section Fields:
+#' Only fields inherited from [`PipeOp`].
+#'
+#' @section Methods:
+#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @examplesIf requireNamespace("dimRed")
+#' library("mlr3")
+#' po = po("isomap", .mute = c("message", "output"))
+#' po$train(list(tsk("iris")))[[1]]$data()
+#' po$predict(list(tsk("iris")))[[1]]$data()
+#'
+#'
+#' @family PipeOps
+#' @template seealso_pipeopslist
+#' @include PipeOpTaskPreproc.R
+#' @export
+#'
+
+PipeOpIsomap = R6Class("PipeOpIsomap",
+  inherit = PipeOpTaskPreproc,
+  public = list(
+    initialize = function(id = "isomap", param_vals = list()) {
+      ps = ps(
+        knn = p_int(default = 50, lower = 1, upper = Inf, tags = c("train", "isomap")),
+        ndim = p_int(default = 2, lower = 1, upper = Inf, tags = c("train", "isomap")),
+        get_geod = p_lgl(default = FALSE, tags = c("train", "isomap")),
+        .mute = p_uty(init = NULL, tags = c("train", "isomap"))
+      )
+      super$initialize(id = id, param_set = ps, param_vals = param_vals,
+        packages = c("dimRed", "stats"), feature_types = c("numeric", "integer"))
+    }
+  ),
+  private = list(
+    .train_dt = function(dt, levels, target) {
+      embed_result = mlr3misc::invoke(.f = dimRed::embed, .data = dt, .method = "Isomap", .args = self$param_set$get_values(tags = "isomap"))
+      self$state = list(embed_result = embed_result)
+      embed_result@data@data
+    },
+    .predict_dt = function(dt, levels) {
+      dimRed::predict(self$state$embed_result, as.data.frame(dt))@data
+    }
+  )
+)
+
+mlr_pipeops$add("isomap", PipeOpIsomap)
@@ -15,7 +15,7 @@ register_mlr3 = function() {
   x$pipeops$valid_tags = unique(c(x$pipeops$valid_tags,
     c("abstract", "meta", "missings", "feature selection", "imbalanced data",
     "data transform", "target transform", "ensemble", "robustify", "learner", "encode",
-     "multiplicity")))
+     "multiplicity", "debug")))
   x$pipeops$properties = c("validation", "internal_tuning")
 }
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ register_mlr3 = function() {`
`15`	`15`	`x$pipeops$valid_tags = unique(c(x$pipeops$valid_tags,`
`16`	`16`	`c("abstract", "meta", "missings", "feature selection", "imbalanced data",`
`17`	`17`	`"data transform", "target transform", "ensemble", "robustify", "learner", "encode",`
`18`		`- "multiplicity")))`
	`18`	`+ "multiplicity", "debug")))`
`19`	`19`	`x$pipeops$properties = c("validation", "internal_tuning")`
`20`	`20`	`}`
`21`	`21`