Skip to content

Commit 1d2842e

Browse files
authored
Changes for new variable types (#358)
* changes for new variable types when reading options from a jasp file * start on fixing unit tests * fix some more tests
1 parent 4fd6ed6 commit 1d2842e

25 files changed

+127
-53
lines changed

R/commonMachineLearningClustering.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
predictors <- unlist(options[["predictors"]])
3535
predictors <- predictors[predictors != ""]
3636
if (is.null(dataset)) {
37-
dataset <- .readAndAddCompleteRowIndices(dataset, predictors)
37+
dataset <- .readAndAddCompleteRowIndices(options, "predictors")
3838
}
3939
if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) {
4040
dataset <- .scaleNumericData(dataset)
@@ -374,8 +374,8 @@
374374
ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) +
375375
jaspGraphs::geom_rangeframe() +
376376
jaspGraphs::themeJaspRaw(legend.position = if (options[["tsneClusterPlotLegend"]]) "right" else "none") +
377-
ggplot2::theme(axis.ticks = ggplot2::element_blank(),
378-
axis.text.x = ggplot2::element_blank(),
377+
ggplot2::theme(axis.ticks = ggplot2::element_blank(),
378+
axis.text.x = ggplot2::element_blank(),
379379
axis.text.y = ggplot2::element_blank())
380380
if (options[["tsneClusterPlotLabels"]]) {
381381
p <- p + ggrepel::geom_text_repel(ggplot2::aes(label = rownames(dataset), x = x, y = y), hjust = -1, vjust = 1, data = plotData, seed = 1)

R/commonMachineLearningRegression.R

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,23 +55,23 @@
5555
}
5656

5757
.readDataClassificationRegressionAnalyses <- function(dataset, options) {
58-
target <- NULL
59-
if (options[["target"]] != "") {
60-
target <- options[["target"]]
61-
}
62-
predictors <- NULL
63-
if (length(options[["predictors"]]) > 0) {
64-
predictors <- unlist(options[["predictors"]])
65-
}
58+
6659
testSetIndicator <- NULL
67-
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") {
68-
testSetIndicator <- options[["testSetIndicatorVariable"]]
69-
}
70-
return(.readAndAddCompleteRowIndices(dataset, columns = c(target, predictors), columnsAsNumeric = testSetIndicator))
60+
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
61+
testSetIndicator <- "testSetIndicatorVariable"
62+
63+
return(.readAndAddCompleteRowIndices(options, c("target", "predictors"), testSetIndicator))
7164
}
7265

73-
.readAndAddCompleteRowIndices <- function(dataset, columns = NULL, columnsAsNumeric = NULL) {
74-
dataset <- .readDataSetToEnd(columns = columns, columns.as.numeric = columnsAsNumeric)
66+
.readAndAddCompleteRowIndices <- function(options, optionNames = NULL, optionNamesAsNumeric = NULL) {
67+
68+
if (!is.null(optionNamesAsNumeric))
69+
for (name in optionNamesAsNumeric) {
70+
name2 <- paste(name, ".types")
71+
if (is.null(options[[name]]))
72+
options[[name2]] <- rep("scale", length(options[[name]]))
73+
}
74+
dataset <- jaspBase::readDataSetByVariableTypes(options, c(optionNames, optionNamesAsNumeric))
7575
complete.index <- which(complete.cases(dataset))
7676
dataset <- na.omit(dataset)
7777
rownames(dataset) <- as.character(complete.index)

R/mlRegressionRegularized.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,13 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {
6969
if (options[["weights"]] != "") {
7070
weights <- options[["weights"]]
7171
}
72-
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") {
73-
testSetIndicator <- options[["testSetIndicatorVariable"]]
74-
}
72+
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
73+
testSetIndicator <- "testSetIndicatorVariable"
74+
7575
predictors <- unlist(options["predictors"])
7676
predictors <- predictors[predictors != ""]
7777
if (is.null(dataset)) {
78-
dataset <- .readAndAddCompleteRowIndices(dataset, columns = predictors, columnsAsNumeric = c(target, weights, testSetIndicator))
78+
dataset <- .readAndAddCompleteRowIndices(options, c("target", "predictors", "weights"), testSetIndicator)
7979
}
8080
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
8181
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])

tests/testthat/test-mlclassificationboosting.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
77
options$modelValid <- "validationManual"
88
options$predictionsColumn <- ""
99
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
10+
options$predictors.types <- rep("scale", 4)
1011
options$savePath <- ""
1112
options$setSeed <- TRUE
1213
options$target <- "Species"
14+
options$target.types <- "nominal"
1315
options$testIndicatorColumn <- ""
1416
options$testSetIndicatorVariable <- ""
1517
options$dataSplitPlot <- FALSE
@@ -38,12 +40,14 @@ options$noOfFolds <- 5
3840
options$deviancePlot <- TRUE
3941
options$outOfBagImprovementPlot <- TRUE
4042
options$relativeInfluencePlot <- TRUE
41-
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
42-
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
43+
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
44+
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
4345
"Hue", "Dilution", "Proline")
46+
options$predictors.types <- rep("scale", length(options$predictors))
4447
options$rocCurve <- TRUE
4548
options$setSeed <- TRUE
4649
options$target <- "Type"
50+
options$target.types <- "nominal"
4751
options$testDataManual <- 0.2
4852
options$testIndicatorColumn <- ""
4953
options$testSetIndicatorVariable <- ""

tests/testthat/test-mlclassificationdecisiontree.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
77
options$modelValid <- "validationManual"
88
options$predictionsColumn <- ""
99
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
10+
options$predictors.types <- rep("scale", 4)
1011
options$savePath <- ""
1112
options$setSeed <- TRUE
1213
options$target <- "Species"
14+
options$target.types <- "nominal"
1315
options$testIndicatorColumn <- ""
1416
options$testSetIndicatorVariable <- ""
1517
options$dataSplitPlot <- FALSE
@@ -33,11 +35,13 @@ options$noOfFolds <- 5
3335
options$decisionTreePlot <- TRUE
3436
options$predictionsColumn <- ""
3537
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
38+
options$predictors.types <- rep("scale", 4)
3639
options$saveModel <- FALSE
3740
options$savePath <- ""
3841
options$setSeed <- TRUE
3942
options$featureImportanceTable <- TRUE
4043
options$target <- "Species"
44+
options$target.types <- "nominal"
4145
options$testDataManual <- 0.2
4246
options$testIndicatorColumn <- ""
4347
options$testSetIndicatorVariable <- ""

tests/testthat/test-mlclassificationknn.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
77
options$modelValid <- "validationManual"
88
options$predictionsColumn <- ""
99
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
10+
options$predictors.types <- rep("scale", 4)
1011
options$savePath <- ""
1112
options$setSeed <- TRUE
1213
options$target <- "Species"
14+
options$target.types <- "nominal"
1315
options$testIndicatorColumn <- ""
1416
options$testSetIndicatorVariable <- ""
1517
options$dataSplitPlot <- FALSE
@@ -36,12 +38,14 @@ options$modelValid <- "validationManual"
3638
options$noOfFolds <- 5
3739
options$errorVsKPlot <- TRUE
3840
options$weightsPlot <- TRUE
39-
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
40-
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
41+
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
42+
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
4143
"Hue", "Dilution", "Proline")
44+
options$predictors.types <- rep("scale", length(options$predictors))
4245
options$rocCurve <- TRUE
4346
options$setSeed <- TRUE
4447
options$target <- "Type"
48+
options$target.types <- "nominal"
4549
options$testDataManual <- 0.2
4650
options$testIndicatorColumn <- ""
4751
options$testSetIndicatorVariable <- ""

tests/testthat/test-mlclassificationlda.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@ options$modelOptimization <- "manual"
1919
options$modelValid <- "validationManual"
2020
options$multicolTable <- TRUE
2121
options$noOfFolds <- 5
22-
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
23-
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
22+
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
23+
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
2424
"Hue", "Dilution", "Proline")
25+
options$predictors.types <- rep("scale", length(options$predictors))
2526
options$priorTable <- TRUE
2627
options$rocCurve <- TRUE
2728
options$setSeed <- TRUE
2829
options$target <- "Type"
30+
options$target.types <- "nominal"
2931
options$testDataManual <- 0.2
3032
options$testIndicatorColumn <- ""
3133
options$testSetIndicatorVariable <- ""

tests/testthat/test-mlclassificationnaivebayes.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,13 @@ options$modelValid <- "validationManual"
1111
options$noOfFolds <- 5
1212
options$predictionsColumn <- ""
1313
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
14+
options$predictors.types <- rep("scale", 4)
1415
options$saveModel <- FALSE
1516
options$savePath <- ""
1617
options$setSeed <- TRUE
1718
options$supportVectorsTable <- TRUE
1819
options$target <- "Species"
20+
options$target.types <- "nominal"
1921
options$testDataManual <- 0.2
2022
options$testIndicatorColumn <- ""
2123
options$testSetIndicatorVariable <- ""

tests/testthat/test-mlclassificationneuralnetwork.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ options$modelOptimization <- "manual"
1414
options$modelValid <- "validationManual"
1515
options$predictionsColumn <- ""
1616
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
17+
options$predictors.types <- rep("scale", 4)
1718
options$saveModel <- FALSE
1819
options$savePath <- ""
1920
options$setSeed <- TRUE
2021
options$target <- "Species"
22+
options$target.types <- "nominal"
2123
options$testDataManual <- 0.2
2224
options$testIndicatorColumn <- ""
2325
options$testSetIndicatorVariable <- ""

tests/testthat/test-mlclassificationrandomforest.R

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@ options$modelOptimization <- "manual"
77
options$modelValid <- "validationManual"
88
options$predictionsColumn <- ""
99
options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")
10+
options$predictors.types <- rep("scale", 4)
1011
options$savePath <- ""
1112
options$setSeed <- TRUE
1213
options$target <- "Species"
14+
options$target.types <- "nominal"
1315
options$testIndicatorColumn <- ""
1416
options$testSetIndicatorVariable <- ""
1517
options$dataSplitPlot <- FALSE
@@ -37,13 +39,15 @@ options$noOfFolds <- 5
3739
options$accuracyDecreasePlot <- TRUE
3840
options$purityIncreasePlot <- TRUE
3941
options$treesVsModelErrorPlot <- TRUE
40-
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
41-
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
42+
options$predictors <- list("Alcohol", "Malic", "Ash", "Alcalinity", "Magnesium", "Phenols",
43+
"Flavanoids", "Nonflavanoids", "Proanthocyanins", "Color",
4244
"Hue", "Dilution", "Proline")
45+
options$predictors.types <- rep("scale", length(options$predictors))
4346
options$rocCurve <- TRUE
4447
options$setSeed <- TRUE
4548
options$featureImportanceTable <- TRUE
4649
options$target <- "Type"
50+
options$target.types <- "scale"
4751
options$testDataManual <- 0.2
4852
options$testIndicatorColumn <- ""
4953
options$testSetIndicatorVariable <- ""

0 commit comments

Comments
 (0)