Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion R/commonMachineLearningClassification.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,47 @@
"noOfTrees", "maxTrees", "baggingFraction", "noOfPredictors", "numberOfPredictors", # Random forest
"complexityParameter", "degree", "gamma", "cost", "tolerance", "epsilon", "maxCost", # Support vector machine
"smoothingParameter", # Naive Bayes
"intercept", "link" # Logistic
"intercept", "link", # Logistic
"balanceLabels", "balanceSamplingMethod" # Common
)
if (includeSaveOptions) {
opt <- c(opt, "saveModel", "savePath")
}
return(opt)
}

# Function balancing the size of classes of a discrete dependent variable in a dataset
.mlBalanceDataset <- function(dataset, options) {
# To balance the classes, this function uses either under- or oversampling to adjust
# the size of each class to either the minimum or maximum class size found in the data.
# The sampling method is random sampling.

# Ensures that if the option is not selected, balancing will not occur.
if (!isTRUE(options[["balanceLabels"]]))
return(dataset)

classes <- dataset[, options[["target"]]]
splitData <- split(dataset, classes)

if (options[["balanceSamplingMethod"]] == "minSample") {
n <- min(sapply(splitData, nrow))
replace <- FALSE
}

else {
n <- max(sapply(splitData, nrow))
replace <- TRUE
}

balancedSplits <- lapply(
X = splitData,
FUN = function(df) {df[sample(nrow(df), size = n, replace = replace), ]}
)
balancedDataset <- do.call(rbind, balancedSplits)

return(balancedDataset)
}

.mlClassificationReadData <- function(dataset, options) {
dataset <- .readDataClassificationRegressionAnalyses(dataset, options, include_weights = FALSE)
if (options[["target"]] != "") {
Expand Down
4 changes: 2 additions & 2 deletions R/commonMachineLearningRegression.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
if (length(factorsWithNewLevels) > 0) {
setType <- switch(type, "test" = gettext("test set"), "validation" = gettext("validation set"), "prediction" = gettext("new dataset"))
additionalMessage <- switch(type,
"test" = gettext(" or use a different test set (e.g., automatically by setting a different seed or manually by specifying the test set indicator)"),
"test" = gettext(" or use a different test set (e.g., automatically by setting a different seed or manually by specifying the test set indicator)"),
"validation" = gettext(" or use a different validation set by setting a different seed"),
"prediction" = "")
factorMessage <- paste(sapply(factorsWithNewLevels, function(i) {
Expand Down Expand Up @@ -597,7 +597,7 @@
}
plot <- createJaspPlot(plot = NULL, title = gettext("Data Split"), width = 800, height = 30)
plot$position <- position
plot$dependOn(options = c("dataSplitPlot", "target", "predictors", "trainingDataManual", "modelValid", "testSetIndicatorVariable", "testSetIndicator", "validationDataManual", "holdoutData", "testDataManual", "modelOptimization"))
plot$dependOn(options = c("balanceSamplingMethod", "balanceLabels", "dataSplitPlot", "target", "predictors", "trainingDataManual", "modelValid", "testSetIndicatorVariable", "testSetIndicator", "validationDataManual", "holdoutData", "testDataManual", "modelOptimization"))
jaspResults[["plotDataSplit"]] <- plot
if (!ready) {
return()
Expand Down
2 changes: 1 addition & 1 deletion R/mlClassificationLda.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ mlClassificationLda <- function(jaspResults, dataset, options, ...) {
# Sample a percentage of the total data set
trainingIndex <- sample.int(nrow(dataset), size = ceiling((1 - options[["testDataManual"]]) * nrow(dataset)))
}
trainingSet <- dataset[trainingIndex, ]
trainingSet <- .mlBalanceDataset(dataset[trainingIndex, ], options)
testSet <- dataset[-trainingIndex, ]
# Check for factor levels in the test set that are not in the training set
.checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
Expand Down
4 changes: 3 additions & 1 deletion R/mlClassificationLogisticMultinomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, .
# Sample a percentage of the total data set
trainingIndex <- sample.int(nrow(dataset), size = ceiling((1 - options[["testDataManual"]]) * nrow(dataset)))
}
trainingSet <- dataset[trainingIndex, ]
# Create training set with optional balanced classes
trainingSet <- .mlBalanceDataset(dataset[trainingIndex, ], options)

# Create the generated test set indicator
testIndicatorColumn <- rep(1, nrow(dataset))
testIndicatorColumn[trainingIndex] <- 0
Expand Down
2 changes: 1 addition & 1 deletion R/mlClassificationNaiveBayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ mlClassificationNaiveBayes <- function(jaspResults, dataset, options, ...) {
# Sample a percentage of the total data set
trainingIndex <- sample.int(nrow(dataset), size = ceiling((1 - options[["testDataManual"]]) * nrow(dataset)))
}
trainingSet <- dataset[trainingIndex, ]
trainingSet <- .mlBalanceDataset(dataset[trainingIndex, ], options)
# Create the generated test set indicator
testIndicatorColumn <- rep(1, nrow(dataset))
testIndicatorColumn[trainingIndex] <- 0
Expand Down
29 changes: 29 additions & 0 deletions inst/qml/common/ui/DataSplit.qml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Section
property alias leaveOneOutVisible: leaveOneOut.visible
property alias kFoldsVisible: kFolds.visible
property alias trainingValidationSplit: trainingValidationSplit.visible
property alias balanceTargetClasses: balanceTargetClasses.visible

title: qsTr("Data Split Preferences")

Expand Down Expand Up @@ -157,4 +158,32 @@ Section
info: qsTr("Partition the remaining data in *n* parts.")
}
}

CheckBox
{
id: balanceTargetClasses
name: "balanceLabels"
label: qsTr("Balance sample size of target classes")
info: qsTr("When clicked, the dataset is balanced to have the same sample size for all classes of the target variable. This is done either through over- or undersampling")

RadioButtonGroup
{
name: "balanceSamplingMethod"

RadioButton
{
value: "minSample"
label: qsTr("Undersample")
checked: true
info: qsTr("Balances the target classes by undersampling to match the size of the smallest class.")
}

RadioButton
{
value: "maxSample"
label: qsTr("Oversample")
info: qsTr("Balances the target classes by oversampling to match the size of the largest class. This is done by sampling with replacement for smaller classes.")
}
}
}
}
7 changes: 6 additions & 1 deletion inst/qml/mlClassificationBoosting.qml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 1 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false
}

Section
{
Expand Down
8 changes: 7 additions & 1 deletion inst/qml/mlClassificationDecisionTree.qml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
kFoldsVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false
}

Section
{
Expand Down
2 changes: 1 addition & 1 deletion inst/qml/mlClassificationKnn.qml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { trainingValidationSplit: !optim.isManual }
UI.DataSplit { trainingValidationSplit: !optim.isManual; balanceTargetClasses: false }

Section
{
Expand Down
2 changes: 1 addition & 1 deletion inst/qml/mlClassificationNeuralNetwork.qml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; balanceTargetClasses: false }

Section
{
Expand Down
8 changes: 7 additions & 1 deletion inst/qml/mlClassificationRandomForest.qml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,13 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 1 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
kFoldsVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false
}

Section
{
Expand Down
7 changes: 6 additions & 1 deletion inst/qml/mlClassificationSvm.qml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
kFoldsVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false }

Section
{
Expand Down
9 changes: 7 additions & 2 deletions inst/qml/mlRegressionBoosting.qml
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,20 @@ Form
{
title: qsTr("Plots")

FIG.DataSplit { }
FIG.DataSplit {}
FIG.PredictivePerformance { }
BOOSTING.Oob { }
BOOSTING.Deviance { }
BOOSTING.RelativeInfluence { }
}

UI.ExportResults { enabled: vars.predictorCount > 1 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false
}

Section
{
Expand Down
8 changes: 7 additions & 1 deletion inst/qml/mlRegressionDecisionTree.qml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
kFoldsVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false
}

Section
{
Expand Down
2 changes: 1 addition & 1 deletion inst/qml/mlRegressionKnn.qml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Form

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }

UI.DataSplit { trainingValidationSplit: !optim.isManual }
UI.DataSplit { trainingValidationSplit: !optim.isManual ; balanceTargetClasses: false }

Section
{
Expand Down
2 changes: 1 addition & 1 deletion inst/qml/mlRegressionLinear.qml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { trainingValidationSplit: false }
UI.DataSplit { trainingValidationSplit: false; balanceTargetClasses: false }

Section
{
Expand Down
2 changes: 1 addition & 1 deletion inst/qml/mlRegressionNeuralNetwork.qml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; balanceTargetClasses: false }

Section
{
Expand Down
8 changes: 7 additions & 1 deletion inst/qml/mlRegressionRandomForest.qml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 1 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
kFoldsVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false
}

Section
{
Expand Down
1 change: 1 addition & 0 deletions inst/qml/mlRegressionRegularized.qml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ Form
leaveOneOutVisible: false
kFoldsVisible: false
trainingValidationSplit: !fixedModel.checked
balanceTargetClasses: false
}

Section
Expand Down
8 changes: 7 additions & 1 deletion inst/qml/mlRegressionSvm.qml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@ Form
}

UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 }
UI.DataSplit { leaveOneOutVisible: false; kFoldsVisible: false; trainingValidationSplit: !optim.isManual }
UI.DataSplit
{
leaveOneOutVisible: false
kFoldsVisible: false
trainingValidationSplit: !optim.isManual
balanceTargetClasses: false
}

Section
{
Expand Down