Skip to content

Commit b72b9d2

Browse files
authored
Better use of preload data (#381)
1 parent 40af4dd commit b72b9d2

9 files changed

+39
-53
lines changed

R/commonMachineLearningClassification.R

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,7 @@
4242
}
4343

4444
.mlClassificationReadData <- function(dataset, options) {
45-
if (is.null(dataset)) {
46-
dataset <- .readDataClassificationRegressionAnalyses(dataset, options)
47-
}
48-
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
49-
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
50-
}
45+
dataset <- .readDataClassificationRegressionAnalyses(dataset, options, include_weights = FALSE)
5146
if (options[["target"]] != "") {
5247
dataset[, options[["target"]]] <- factor(dataset[, options[["target"]]], ordered = FALSE)
5348
}

R/commonMachineLearningClustering.R

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,7 @@
3333
.mlClusteringReadData <- function(dataset, options) {
3434
predictors <- unlist(options[["predictors"]])
3535
predictors <- predictors[predictors != ""]
36-
if (is.null(dataset)) {
37-
dataset <- .readAndAddCompleteRowIndices(options, "predictors")
38-
}
36+
dataset <- jaspBase::excludeNaListwise(dataset, predictors)
3937
if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) {
4038
dataset <- .scaleNumericData(dataset)
4139
}

R/commonMachineLearningRegression.R

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,39 @@
4444
return(opt)
4545
}
4646

47-
.readDataRegressionAnalyses <- function(dataset, options, jaspResults) {
48-
if (is.null(dataset)) {
49-
dataset <- .readDataClassificationRegressionAnalyses(dataset, options)
50-
}
51-
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
52-
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
53-
}
47+
.readDataRegressionAnalyses <- function(dataset, options, jaspResults, include_weights = FALSE) {
48+
dataset <- .readDataClassificationRegressionAnalyses(dataset, options, include_weights)
5449
return(dataset)
5550
}
5651

57-
.readDataClassificationRegressionAnalyses <- function(dataset, options) {
52+
.readDataClassificationRegressionAnalyses <- function(dataset, options, include_weights) {
5853

54+
target <- NULL
55+
weights <- NULL
5956
testSetIndicator <- NULL
60-
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
61-
testSetIndicator <- "testSetIndicatorVariable"
57+
if (options[["target"]] != "") {
58+
target <- options[["target"]]
59+
}
60+
if (include_weights && options[["weights"]] != "") {
61+
weights <- options[["weights"]]
62+
}
63+
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") {
64+
testSetIndicator <- options[["testSetIndicatorVariable"]]
65+
}
66+
67+
predictors <- unlist(options["predictors"])
68+
predictors <- predictors[predictors != ""]
69+
dataset <- jaspBase::excludeNaListwise(dataset, c(target, predictors, weights, testSetIndicator))
6270

63-
return(.readAndAddCompleteRowIndices(options, c("target", "predictors"), testSetIndicator))
71+
# Scale numeric predictors
72+
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
73+
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
74+
}
75+
# Make sure the test set indicator is numeric
76+
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
77+
dataset[[options[["testSetIndicatorVariable"]]]] <- as.numeric(dataset[[options[["testSetIndicatorVariable"]]]])
78+
79+
return(dataset)
6480
}
6581

6682
.readAndAddCompleteRowIndices <- function(options, optionNames = NULL, optionNamesAsNumeric = NULL) {
@@ -72,9 +88,7 @@
7288
options[[name2]] <- rep("scale", length(options[[name]]))
7389
}
7490
dataset <- jaspBase::readDataSetByVariableTypes(options, c(optionNames, optionNamesAsNumeric))
75-
complete.index <- which(complete.cases(dataset))
76-
dataset <- na.omit(dataset)
77-
rownames(dataset) <- as.character(complete.index)
91+
dataset <- jaspBase::excludeNaListwise(dataset, c(options[["target"]], options[["predictors"]]))
7892
return(dataset)
7993
}
8094

R/mlPrediction.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ is.jaspMachineLearning <- function(x) {
422422
selection <- predictions[indexes]
423423
cols <- list(row = indexes, pred = selection)
424424
if (options[["predictionsTableFeatures"]]) {
425-
for (i in model[["jaspVars"]][["encoded"]]$predictors) {
425+
for (i in colnames(dataset)) {
426426
if (.columnIsNominal(i)) {
427427
table$addColumnInfo(name = i, title = i, type = "string")
428428
var <- levels(dataset[[i]])[dataset[[i]]]

R/mlRegressionLinear.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
mlRegressionLinear <- function(jaspResults, dataset, options, ...) {
1919

2020
# Preparatory work
21-
dataset <- .mlRegressionRegularizedReadData(dataset, options)
21+
dataset <- .readDataRegressionAnalyses(dataset, options, include_weights = TRUE)
2222
.mlRegressionErrorHandling(dataset, options, type = "lm")
2323

2424
# Check if analysis is ready to run

R/mlRegressionRegularized.R

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {
1919

2020
# Preparatory work
21-
dataset <- .mlRegressionRegularizedReadData(dataset, options)
21+
dataset <- .readDataRegressionAnalyses(dataset, options, include_weights = TRUE)
2222
.mlRegressionErrorHandling(dataset, options, type = "regularized")
2323

2424
# Check if analysis is ready to run
@@ -58,31 +58,6 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {
5858
.mlRegressionRegularizedPlotLambda(options, jaspResults, ready, position = 10)
5959
}
6060

61-
# Read dataset
62-
.mlRegressionRegularizedReadData <- function(dataset, options) {
63-
target <- NULL
64-
weights <- NULL
65-
testSetIndicator <- NULL
66-
if (options[["target"]] != "") {
67-
target <- options[["target"]]
68-
}
69-
if (options[["weights"]] != "") {
70-
weights <- options[["weights"]]
71-
}
72-
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
73-
testSetIndicator <- "testSetIndicatorVariable"
74-
75-
predictors <- unlist(options["predictors"])
76-
predictors <- predictors[predictors != ""]
77-
if (is.null(dataset)) {
78-
dataset <- .readAndAddCompleteRowIndices(options, c("target", "predictors", "weights"), testSetIndicator)
79-
}
80-
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
81-
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
82-
}
83-
return(dataset)
84-
}
85-
8661
.regularizedRegression <- function(dataset, options, jaspResults) {
8762
# Set model-specific parameters
8863
alpha <- switch(options[["penalty"]],

inst/help/mlPrediction.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Prediction
22
===
33

4+
The prediction analysis enables you to load a trained machine learning model and apply it to new data. It is important that the features in the new dataset have the same names as in the original dataset used for training.
5+
46
### Input
57

68
#### Trained Model

inst/help/mlPrediction_nl.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
Voorspellen
22
===
33

4+
Met de voorspellingsanalyse kun je een getraind machine-learningmodel laden en toepassen op nieuwe gegevens. Het is belangrijk dat de kenmerken in de nieuwe dataset dezelfde namen hebben als in de oorspronkelijke dataset die voor de training is gebruikt.
5+
46
### Invoer
57

68
#### Getraind model

inst/qml/mlPrediction.qml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import "./common/tables" as TAB
2626

2727
Form
2828
{
29-
info: qsTr("The prediction analysis enables you to load a trained machine learning model and apply it to new data.")
29+
info: qsTr("The prediction analysis enables you to load a trained machine learning model and apply it to new data. It is important that the features in the new dataset have the same names as in the original dataset used for training.")
3030

3131
FileSelector
3232
{
@@ -53,7 +53,7 @@ Form
5353
id: predictors
5454
name: "predictors"
5555
title: qsTr("Features")
56-
allowedColumns: ["scale", "ordinal", "nominal"]
56+
allowedColumns: ["scale", "nominal"]
5757
allowAnalysisOwnComputedColumns: false
5858
}
5959
}

0 commit comments

Comments
 (0)