Skip to content

Commit 8dd971f

Browse files
authored
Merge pull request #945 from mlr-org/hiwibranch
PipeOpInfo, PipeOpIsomap
2 parents 1151d89 + 16d65d9 commit 8dd971f

File tree

96 files changed

+1183
-22
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+1183
-22
lines changed

.gitignore

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,57 +11,44 @@
1111
*.swp
1212
doc
1313
Meta
14-
1514
.vscode/*
1615
!.vscode/settings.json
1716
!.vscode/tasks.json
1817
!.vscode/launch.json
1918
!.vscode/extensions.json
2019
*.code-workspace
21-
2220
# Local History for Visual Studio Code
2321
.history/
24-
2522
# History files
2623
.Rhistory
2724
.Rapp.history
28-
2925
# Session Data files
3026
.RData
31-
3227
# User-specific files
3328
.Ruserdata
34-
3529
# Example code in package build process
3630
*-Ex.R
37-
3831
# Output files from R CMD build
3932
/*.tar.gz
40-
4133
# Output files from R CMD check
4234
/*.Rcheck/
43-
4435
# RStudio files
4536
.Rproj.user/
46-
4737
# produced vignettes
4838
vignettes/*.html
4939
vignettes/*.pdf
50-
5140
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
5241
.httr-oauth
53-
5442
# knitr and R markdown default cache directories
5543
*_cache/
5644
/cache/
57-
5845
# Temporary files created by R markdown
5946
*.utf8.md
6047
*.knit.md
61-
6248
# R Environment Variables
6349
.Renviron
64-
6550
# pkgdown site
6651
docs/
6752
/Meta/
53+
logger_file
54+
.DS_Store

DESCRIPTION

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,10 @@ Suggests:
101101
future,
102102
htmlwidgets,
103103
ranger,
104-
themis
104+
themis,
105+
dimRed,
106+
RSpectra,
107+
RANN
105108
ByteCompile: true
106109
Encoding: UTF-8
107110
Config/testthat/edition: 3
@@ -159,6 +162,8 @@ Collate:
159162
'PipeOpImputeMode.R'
160163
'PipeOpImputeOOR.R'
161164
'PipeOpImputeSample.R'
165+
'PipeOpInfo.R'
166+
'PipeOpIsomap.R'
162167
'PipeOpKernelPCA.R'
163168
'PipeOpLearner.R'
164169
'PipeOpLearnerCV.R'

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ export(PipeOpImputeMedian)
133133
export(PipeOpImputeMode)
134134
export(PipeOpImputeOOR)
135135
export(PipeOpImputeSample)
136+
export(PipeOpInfo)
137+
export(PipeOpIsomap)
136138
export(PipeOpKernelPCA)
137139
export(PipeOpLearner)
138140
export(PipeOpLearnerCV)

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
* Added support for internal validation tasks to `PipeOpFeatureUnion`.
77
* feat: `PipeOpLearnerCV` can reuse the cross-validation models during prediction by averaging their outputs (`resampling.predict_method = "cv_ensemble"`).
88
* feat: `PipeOpRegrAvg` gets new `se_aggr` and `se_aggr_rho` hyperparameters and now allows various forms of SE aggregation.
9+
* Fix: `PipeOpInfo` now prints a bounded task preview (respecting target/feature ordering and row ids) and collapses logger output to single messages.
10+
* Fix: `PipeOpIsomap` only operates on numeric or integer features and its parameter documentation was corrected.
911
* Fix: `PipeOpRemoveConstants` now avoids integer overflow when evaluating relative tolerances for near-`integer.max` data.
1012
* Compatibility with new testthat version 3.3.0
1113

R/Graph.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,7 @@ graph_load_namespaces = function(self, info) {
730730

731731

732732
#' @export
733+
#' @method predict Graph
733734
predict.Graph = function(object, newdata, ...) {
734735
if (!object$is_trained) {
735736
stop("Cannot predict, Graph has not been trained yet")

R/PipeOpInfo.R

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
#' @title Customizable Information Printer
2+
#'
3+
#' @usage NULL
4+
#' @name mlr_pipeops_info
5+
#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOp`]
6+
#'
7+
#' @description
8+
#' `PipeOpInfo` prints its input to the console or a logger in a customizable way.
9+
#' Users can define how specific object classes should be displayed using custom printer functions.
10+
#'
11+
#' @section Construction:
12+
#' ```
13+
#' PipeOpInfo$new(id = "info", collect_multiplicity = FALSE, log_target = "lgr::mlr3/mlr3pipelines::info")
14+
#' ```
15+
#' * `id` :: `character(1)`\cr
16+
#' Identifier of resulting object, default "info"
17+
#' * `printer` :: `list` \cr
18+
#' Optional mapping from object classes to printer functions. Custom functions override default printer-functions.
19+
#' * `collect_multiplicity` :: `logical(1)`\cr
20+
#' If `TRUE`, the input is a [`Multiplicity`] collecting channel. [`Multiplicity`] input/output is accepted and the members are aggregated.
21+
#' * `log_target` :: `character(1)`\cr
22+
#' Specifies how the input object is printed to the console. By default it is
23+
#' directed to a logger, whose address can be customized using the form
24+
#' `<output>::<argument1>::<argument2>`. Otherwise it can be printed
25+
#' as "message", "warning" or "cat". When set to "none", no customized
26+
#' information about the object will be printed.
27+
#'
28+
#' @section Input and Output Channels:
29+
#' `PipeOpInfo` has one input channel called "input", it can take any type of input (`*`).
30+
#' `PipeOpInfo` has one output channel called "output", it can take any type of output (`*`).
31+
#'
32+
#' @section State:
33+
#' The `$state` is left empty (`list()`).
34+
#'
35+
#' @section Internals:
36+
#' `PipeOpInfo` forwards its input unchanged, but prints information about it
37+
#' depending on the `printer` and `log_target` settings.
38+
#'
39+
#' @section Fields:
40+
#' Fields inherited from `PipeOp`, as well as:
41+
#' * `printer` :: `list`\cr
42+
#' Mapping of object classes to printer functions. Includes printer-specifications for `Task`, `Prediction`, `NULL`. Otherwise object is printed as is.
43+
#' * `log_target` :: `character(1)` \cr
44+
#' Specifies current output target.
45+
#'
46+
#' @section Methods:
47+
#' Only methods inherited from [`PipeOp`].
48+
#'
49+
#' @examples
50+
#' library("mlr3")
51+
#'
52+
#' poinfo = po("info")
53+
#' poinfo$train(list(tsk("mtcars")))
54+
#' poinfo$predict(list(tsk("mtcars")))
55+
#'
56+
#' # Specify customized console output for Task-objects
57+
#' poinfo = po("info", log_target = "cat",
58+
#' printer = list(Task = function(x) list(head_data = head(x$data()), nrow = nrow(x$data())))
59+
#' )
60+
#'
61+
#' poinfo$train(list(tsk("iris")))
62+
#' poinfo$predict(list(tsk("iris")))
63+
#'
64+
#' @family PipeOps
65+
#' @template seealso_pipeopslist
66+
#' @include PipeOp.R
67+
#' @export
68+
#'
69+
#'
70+
71+
PipeOpInfo = R6Class("PipeOpInfo",
72+
inherit = PipeOp,
73+
public = list(
74+
initialize = function(id = "info", printer = NULL, collect_multiplicity = FALSE, log_target = "lgr::mlr3/mlr3pipelines::info", param_vals = list()) {
75+
assertString(log_target, pattern = "^(cat|none|warning|message|lgr::[^:]+::[^:]+)$")
76+
inouttype = "*"
77+
if (collect_multiplicity) {
78+
inouttype = sprintf("[%s]", inouttype)
79+
}
80+
super$initialize(id, param_vals = param_vals,
81+
input = data.table(name = "input", train = inouttype, predict = inouttype),
82+
output = data.table(name = "output", train = inouttype, predict = inouttype)
83+
#tag = "debug"
84+
)
85+
original_printer = list(
86+
Task = crate(function(x) {
87+
row_preview = head(x$row_ids, 10L)
88+
col_preview = head(c(x$target_names, x$feature_names), 10L)
89+
data_preview = x$data(rows = row_preview, cols = col_preview)
90+
list(
91+
task = x,
92+
data_preview = data_preview
93+
)
94+
}),
95+
Prediction = crate(function(x) {
96+
tryCatch(list(prediction = x, score = x$score()), error = function(e) {list(prediction = x)})
97+
}),
98+
`NULL` = crate(function(x) "NULL"),
99+
default = crate(function(x) x)
100+
)
101+
private$.printer = insert_named(original_printer, printer)
102+
private$.log_target = log_target
103+
}
104+
),
105+
active = list(
106+
printer = function(rhs) {
107+
if (!missing(rhs)) stop("printer is read only.")
108+
private$.printer
109+
},
110+
log_target = function(rhs) {
111+
if (!missing(rhs)) stop("log_target is read only.")
112+
private$.log_target
113+
}
114+
),
115+
private = list(
116+
.printer = NULL,
117+
.log_target = NULL,
118+
.output = function(inputs, stage) {
119+
input_class = class(inputs[[1]])
120+
leftmost_class =
121+
if (any(input_class %in% names(private$.printer))) {
122+
input_class[input_class %in% names(private$.printer)][[1]]
123+
} else {
124+
"default"
125+
}
126+
if (!("default" %in% names(private$.printer))) {
127+
stop("Object-class was not found and no default printer is available.")
128+
}
129+
specific_printer = private$.printer[[leftmost_class]]
130+
log_target_split = strsplit(private$.log_target, "::")[[1]]
131+
stage_string = sprintf("Object passing through PipeOp %s - %s", self$id, stage)
132+
print_string = utils::capture.output({
133+
cat(stage_string, "\n\n")
134+
specific_printer(inputs[[1]])
135+
})
136+
message_text = paste(print_string, collapse = "\n")
137+
if (log_target_split[[1]] == "lgr") {
138+
logger = lgr::get_logger(log_target_split[[2]])
139+
log_level = log_target_split[[3]]
140+
logger$log(log_level, msg = message_text)
141+
} else if (private$.log_target == "cat") {
142+
cat(message_text)
143+
} else if (private$.log_target == "message") {
144+
message(message_text)
145+
} else if (private$.log_target == "warning") {
146+
warning(message_text)
147+
} else if (private$.log_target == "none") {
148+
} else {
149+
stopf("Invalid log_target '%s'.", private$.log_target)
150+
}
151+
},
152+
.train = function(inputs, stage = "Training") {
153+
self$state = list()
154+
private$.output(inputs, stage)
155+
inputs
156+
},
157+
.predict = function(inputs, stage = "Prediction") {
158+
private$.output(inputs, stage)
159+
inputs
160+
},
161+
.additional_phash_input = function() {
162+
list(printer = self$printer, log_target = self$log_target)
163+
}
164+
)
165+
)
166+
167+
mlr_pipeops$add("info", PipeOpInfo)

R/PipeOpIsomap.R

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#' @title Algorithm for Dimensionality Reduction
2+
#'
3+
#' @usage NULL
4+
#' @name mlr_pipeops_isomap
5+
#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]
6+
#'
7+
#' @description
8+
#' Reduces the dimensionality of the data of the input [`Task`][mlr3::Task] using the
9+
#' Isomap algorithm from the `dimRed`-package, preserving geodesic distances
10+
#' between observations. The number of neighbors (`knn`) and embedding
11+
#' dimensions (`ndim`) control the transformation.
12+
#'
13+
#'
14+
#' @section Construction:
15+
#' ```
16+
#' PipeOpIsomap$new(id = "isomap", ...)
17+
#' ```
18+
#'
19+
#' * `id` :: `character(1)`\cr
20+
#' Identifier of resulting object, default `"isomap"`
21+
#' * `param_vals` :: named `list`\cr
22+
#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
23+
#'
24+
#' @section Input and Output Channels:
25+
#' Input and output channels are inherited from [`PipeOpTaskPreproc`].
26+
#'
27+
#' The output is the input [`Task`][mlr3::Task] with the data projected to the lower-dimensional space.
28+
#'
29+
#' @section State:
30+
#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`], as well as:
31+
#' * `embed_result` :: `dimRedResult`\cr
32+
#' The resulting object after applying the "Isomap"-method from the `dimRed`-package to the data.
33+
#'
34+
#' @section Parameters:
35+
#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
36+
#' * `knn` :: `integer(1)`\cr
37+
#' The number of nearest neighbors in the graph.
38+
#' Initialized to 50.
39+
#' * `ndim` :: `integer(1)`\cr
40+
#' The number of embedding dimensions.
41+
#' Initialized to 2.
42+
#' * `get_geod` :: `logical(1)`\cr
43+
#' Determines whether the distance matrix should be kept in the `$state`.
44+
#' Initialized to `FALSE`.
45+
#' * `.mute` :: `character`\cr
46+
#' A character vector of elements to mute during training (e.g. c("message", "output")).
47+
#' Initialized to `NULL`.
48+
#'
49+
#' @section Internals:
50+
#' Applies the Isomap embedding from the `dimRed`-package.
51+
#'
52+
#' @section Fields:
53+
#' Only fields inherited from [`PipeOp`].
54+
#'
55+
#' @section Methods:
56+
#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
57+
#'
58+
#' @examplesIf requireNamespace("dimRed")
59+
#' library("mlr3")
60+
#' po = po("isomap", .mute = c("message", "output"))
61+
#' po$train(list(tsk("iris")))[[1]]$data()
62+
#' po$predict(list(tsk("iris")))[[1]]$data()
63+
#'
64+
#'
65+
#' @family PipeOps
66+
#' @template seealso_pipeopslist
67+
#' @include PipeOpTaskPreproc.R
68+
#' @export
69+
#'
70+
71+
PipeOpIsomap = R6Class("PipeOpIsomap",
72+
inherit = PipeOpTaskPreproc,
73+
public = list(
74+
initialize = function(id = "isomap", param_vals = list()) {
75+
ps = ps(
76+
knn = p_int(default = 50, lower = 1, upper = Inf, tags = c("train", "isomap")),
77+
ndim = p_int(default = 2, lower = 1, upper = Inf, tags = c("train", "isomap")),
78+
get_geod = p_lgl(default = FALSE, tags = c("train", "isomap")),
79+
.mute = p_uty(init = NULL, tags = c("train", "isomap"))
80+
)
81+
super$initialize(id = id, param_set = ps, param_vals = param_vals,
82+
packages = c("dimRed", "stats"), feature_types = c("numeric", "integer"))
83+
}
84+
),
85+
private = list(
86+
.train_dt = function(dt, levels, target) {
87+
embed_result = mlr3misc::invoke(.f = dimRed::embed, .data = dt, .method = "Isomap", .args = self$param_set$get_values(tags = "isomap"))
88+
self$state = list(embed_result = embed_result)
89+
embed_result@data@data
90+
},
91+
.predict_dt = function(dt, levels) {
92+
dimRed::predict(self$state$embed_result, as.data.frame(dt))@data
93+
}
94+
)
95+
)
96+
97+
mlr_pipeops$add("isomap", PipeOpIsomap)

R/zzz.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ register_mlr3 = function() {
1515
x$pipeops$valid_tags = unique(c(x$pipeops$valid_tags,
1616
c("abstract", "meta", "missings", "feature selection", "imbalanced data",
1717
"data transform", "target transform", "ensemble", "robustify", "learner", "encode",
18-
"multiplicity")))
18+
"multiplicity", "debug")))
1919
x$pipeops$properties = c("validation", "internal_tuning")
2020
}
2121

man/PipeOp.Rd

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/PipeOpEncodePL.Rd

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)