From 4d5fd7375bd3b68e1ccf582041abe523933d7afe Mon Sep 17 00:00:00 2001
From: Koen Derks <koen-derks@hotmail.com>
Date: Wed, 26 Mar 2025 23:47:11 +0100
Subject: [PATCH 1/4] Class probabilities in prediction analysis

---
 R/mlPrediction.R                     | 112 +++++++++++++++++++--------
 inst/qml/common/ui/ExportResults.qml |  10 +++
 inst/qml/mlPrediction.qml            |   1 +
 3 files changed, 90 insertions(+), 33 deletions(-)

diff --git a/R/mlPrediction.R b/R/mlPrediction.R
index 5128061f..ea3f0008 100644
--- a/R/mlPrediction.R
+++ b/R/mlPrediction.R
@@ -88,74 +88,103 @@ is.jaspMachineLearning <- function(x) {
 }
 .mlPredictionGetPredictions.kknn <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    as.character(kknn:::predict.train.kknn(model[["predictive"]], dataset))
+    hard <- as.character(kknn:::predict.train.kknn(model[["predictive"]], dataset))
+    soft <- kknn:::predict.train.kknn(model[["predictive"]], dataset, type = "prob")
+    return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
-    as.numeric(kknn:::predict.train.kknn(model[["predictive"]], dataset))
+    hard <- as.numeric(kknn:::predict.train.kknn(model[["predictive"]], dataset))
+    return(list(hard))
   }
 }
 .mlPredictionGetPredictions.lda <- function(model, dataset) {
-  as.character(MASS:::predict.lda(model, newdata = dataset)$class)
+  hard <- as.character(MASS:::predict.lda(model, newdata = dataset)$class)
+  soft <- MASS:::predict.lda(model, newdata = dataset)$posterior
+  return(list(hard, soft))
 }
 .mlPredictionGetPredictions.lm <- function(model, dataset) {
-  as.numeric(predict(model, newdata = dataset))
+  hard <- as.numeric(predict(model, newdata = dataset))
+  return(list(hard))
 }
 .mlPredictionGetPredictions.gbm <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    tmp <- gbm:::predict.gbm(model, newdata = dataset, n.trees = model[["n.trees"]], type = "response")
-    as.character(colnames(tmp)[apply(tmp, 1, which.max)])
+    soft <- gbm:::predict.gbm(model, newdata = dataset, n.trees = model[["n.trees"]], type = "response")
+    hard <- as.character(colnames(soft)[apply(soft, 1, which.max)])
+    return(list(hard, soft[, , 1]))
   } else if (inherits(model, "jaspRegression")) {
-    as.numeric(gbm:::predict.gbm(model, newdata = dataset, n.trees = model[["n.trees"]], type = "response"))
+    hard <- as.numeric(gbm:::predict.gbm(model, newdata = dataset, n.trees = model[["n.trees"]], type = "response"))
+    return(list(hard))
   }
 }
 .mlPredictionGetPredictions.randomForest <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    as.character(randomForest:::predict.randomForest(model, newdata = dataset))
+    hard <- as.character(randomForest:::predict.randomForest(model, newdata = dataset))
+    soft <- predict(model, newdata = dataset, type = "prob")
+    return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
-    as.numeric(randomForest:::predict.randomForest(model, newdata = dataset))
+    hard <- as.numeric(randomForest:::predict.randomForest(model, newdata = dataset))
+    return(list(hard))
   }
 }
 .mlPredictionGetPredictions.cv.glmnet <- function(model, dataset) {
-  as.numeric(glmnet:::predict.cv.glmnet(model, newx = data.matrix(dataset)))
+  hard <- as.numeric(glmnet:::predict.cv.glmnet(model, newx = data.matrix(dataset)))
+  return(list(hard))
 }
 .mlPredictionGetPredictions.nn <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    as.character(levels(factor(model[["data"]][[model[["jaspVars"]][["encoded"]]$target]]))[max.col(neuralnet:::predict.nn(model, newdata = dataset))])
+    soft <- neuralnet:::predict.nn(model, newdata = dataset)
+    colnames(soft) <- levels(factor(model[["data"]][[model[["jaspVars"]][["encoded"]]$target]]))
+    hard <- colnames(soft)[apply(soft, 1, which.max)]
+    return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
-    as.numeric(neuralnet:::predict.nn(model, newdata = dataset))
+    hard <- as.numeric(neuralnet:::predict.nn(model, newdata = dataset))
+    return(list(hard))
   }
 }
 .mlPredictionGetPredictions.rpart <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    as.character(levels(factor(model[["data"]][[model[["jaspVars"]][["encoded"]]$target]]))[max.col(rpart:::predict.rpart(model, newdata = dataset))])
+    soft <- rpart:::predict.rpart(model, newdata = dataset)
+    colnames(soft) <- levels(factor(model[["data"]][[model[["jaspVars"]][["encoded"]]$target]]))
+    hard <- colnames(soft)[apply(soft, 1, which.max)]
+    return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
-    as.numeric(rpart:::predict.rpart(model, newdata = dataset))
+    hard <- as.numeric(rpart:::predict.rpart(model, newdata = dataset))
+    return(list(hard))
   }
 }
 .mlPredictionGetPredictions.svm <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    as.character(levels(factor(model[["data"]][[model[["jaspVars"]][["encoded"]]$target]]))[e1071:::predict.svm(model, newdata = dataset)])
+    soft <- attr(e1071:::predict.svm(model, newdata = dataset, probability = TRUE), "probabilities")
+    hard <- as.character(e1071:::predict.svm(model, newdata = dataset))
+    return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
-    as.numeric(e1071:::predict.svm(model, newdata = dataset))
+    hard <- as.numeric(e1071:::predict.svm(model, newdata = dataset))
+    return(list(hard))
   }
 }
 .mlPredictionGetPredictions.naiveBayes <- function(model, dataset) {
-  as.character(e1071:::predict.naiveBayes(model, newdata = dataset, type = "class"))
+  soft <- e1071:::predict.naiveBayes(model, newdata = dataset, type = "raw")
+  hard <- as.character(e1071:::predict.naiveBayes(model, newdata = dataset, type = "class"))
+  return(list(hard, soft))
 }
 .mlPredictionGetPredictions.glm <- function(model, dataset) {
-  as.character(levels(as.factor(model$model[[model[["jaspVars"]][["encoded"]]$target]]))[round(predict(model, newdata = dataset, type = "response"), 0) + 1])
+  probs <- predict(model, newdata = dataset, type = "response")
+  soft <- matrix(c(1 - probs, probs), ncol = 2)
+  colnames(soft) <- levels(as.factor(model$model[[model[["jaspVars"]][["encoded"]]$target]]))
+  hard <- colnames(soft)[apply(soft, 1, which.max)]
+  return(list(hard, soft))
 }
 .mlPredictionGetPredictions.vglm <- function(model, dataset) {
   logodds <- predict(model[["original"]], newdata = dataset)
   ncategories <- ncol(logodds) + 1
-  probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories)
+  soft <- matrix(0, nrow = nrow(logodds), ncol = ncategories)
   for (i in seq_len(ncategories - 1)) {
-    probabilities[, i] <- exp(logodds[, i])
+    soft[, i] <- exp(logodds[, i])
   }
-  probabilities[, ncategories] <- 1
-  row_sums <- rowSums(probabilities)
-  probabilities <- probabilities / row_sums
-  predicted_columns <- apply(probabilities, 1, which.max)
-  as.character(levels(as.factor(model$target))[predicted_columns])
+  soft[, ncategories] <- 1
+  soft <- soft / rowSums(soft)
+  colnames(soft) <- as.character(levels(as.factor(model$target)))
+  hard <- colnames(soft)[apply(soft, 1, which.max)]
+  return(list(hard, soft))
 }
 
 # S3 method to make find out number of observations in training data
@@ -372,7 +401,7 @@ is.jaspMachineLearning <- function(x) {
   if (!ready) {
     return()
   }
-  predictions <- .mlPredictionsState(model, dataset, options, jaspResults, ready)
+  predictions <- .mlPredictionsState(model, dataset, options, jaspResults, ready)[[1]]
   indexes <- options[["fromIndex"]]:options[["toIndex"]]
   selection <- predictions[indexes]
   cols <- list(row = indexes, pred = selection)
@@ -397,12 +426,29 @@ is.jaspMachineLearning <- function(x) {
 }
 
 .mlPredictionsAddPredictions <- function(model, dataset, options, jaspResults, ready) {
-  if (options[["addPredictions"]] && is.null(jaspResults[["predictionsColumn"]]) && options[["predictionsColumn"]] != "" && ready) {
-    predictionsColumn <- rep(NA, max(as.numeric(rownames(dataset))))
-    predictionsColumn[as.numeric(rownames(dataset))] <- .mlPredictionsState(model, dataset, options, jaspResults, ready)
-    jaspResults[["predictionsColumn"]] <- createJaspColumn(columnName = options[["predictionsColumn"]])
-    jaspResults[["predictionsColumn"]]$dependOn(options = c("predictionsColumn", "predictors", "trainedModelFilePath", "scaleVariables", "addPredictions"))
-    if (inherits(model, "jaspClassification")) jaspResults[["predictionsColumn"]]$setNominal(predictionsColumn)
-    if (inherits(model, "jaspRegression")) jaspResults[["predictionsColumn"]]$setScale(predictionsColumn)
+  if (options[["addPredictions"]] && options[["predictionsColumn"]] != "" && ready) {
+    predictions <- .mlPredictionsState(model, dataset, options, jaspResults, ready)
+    # Add hard predictions for regression and classification
+    if (is.null(jaspResults[["predictionsColumn"]])) {
+      predictionsColumn <- rep(NA, max(as.numeric(rownames(dataset))))
+      predictionsColumn[as.numeric(rownames(dataset))] <- predictions[[1]]
+      jaspResults[["predictionsColumn"]] <- createJaspColumn(columnName = options[["predictionsColumn"]])
+      jaspResults[["predictionsColumn"]]$dependOn(options = c("predictionsColumn", "predictors", "trainedModelFilePath", "scaleVariables", "addPredictions"))
+      if (inherits(model, "jaspClassification")) jaspResults[["predictionsColumn"]]$setNominal(predictionsColumn)
+      if (inherits(model, "jaspRegression")) jaspResults[["predictionsColumn"]]$setScale(predictionsColumn)
+    }
+    # Add predicted probabilities for classification only
+    if (inherits(model, "jaspClassification") && options[["addProbabilities"]]) {
+      classNames <- colnames(predictions[[2]])
+      for (i in seq_along(classNames)) {
+        colName <- paste0(decodeColNames(options[["predictionsColumn"]]), "_", classNames[i])
+        if (!is.null(jaspResults[[colName]])) {
+          break
+        }
+        jaspResults[[colName]] <- createJaspColumn(columnName = colName)
+        jaspResults[[colName]]$dependOn(options = c("predictionsColumn", "predictors", "trainedModelFilePath", "scaleVariables", "addPredictions", "addProbabilities"))
+        jaspResults[[colName]]$setScale(predictions[[2]][, i])
+      }
+    }
   }
 }
diff --git a/inst/qml/common/ui/ExportResults.qml b/inst/qml/common/ui/ExportResults.qml
index c7401e84..43a2158c 100644
--- a/inst/qml/common/ui/ExportResults.qml
+++ b/inst/qml/common/ui/ExportResults.qml
@@ -24,6 +24,7 @@ Group
 {
 	property alias enabled:			exportSection.enabled
 	property alias showSave:		saveGroup.visible
+	property bool showProbs:		false
 
 	id:								exportSection
 	title:							qsTr("Export Results")
@@ -45,6 +46,15 @@ Group
 			enabled:				addPredictions.checked
 			info:					qsTr("The column name for the predicted values.")
 		}
+
+		CheckBox
+		{
+			id:						probabilities
+			name:					"addProbabilities"
+			text:					qsTr("Add probabilities (classification only)")
+			visible:				showProbs
+			info:					qsTr("In classification analyses, also add the predicted probabilities for each class to the data.")
+		}
 	}
 
 	Group
diff --git a/inst/qml/mlPrediction.qml b/inst/qml/mlPrediction.qml
index c0c5cb64..536b7c04 100644
--- a/inst/qml/mlPrediction.qml
+++ b/inst/qml/mlPrediction.qml
@@ -116,5 +116,6 @@ Form
 	UI.ExportResults {
 		enabled:								predictors.count > 1
 		showSave:								false
+		showProbs:								true
 	}
 }

From f3af3a07a0032c748dec3178432b7c2bcc533139 Mon Sep 17 00:00:00 2001
From: Koen Derks <koen-derks@hotmail.com>
Date: Thu, 27 Mar 2025 08:15:26 +0100
Subject: [PATCH 2/4] More explicit tie breaking

---
 R/mlPrediction.R | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/R/mlPrediction.R b/R/mlPrediction.R
index ea3f0008..d32786a2 100644
--- a/R/mlPrediction.R
+++ b/R/mlPrediction.R
@@ -88,8 +88,8 @@ is.jaspMachineLearning <- function(x) {
 }
 .mlPredictionGetPredictions.kknn <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    hard <- as.character(kknn:::predict.train.kknn(model[["predictive"]], dataset))
     soft <- kknn:::predict.train.kknn(model[["predictive"]], dataset, type = "prob")
+    hard <- colnames(soft)[max.col(soft, ties.method = "random")]
     return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
     hard <- as.numeric(kknn:::predict.train.kknn(model[["predictive"]], dataset))
@@ -97,8 +97,8 @@ is.jaspMachineLearning <- function(x) {
   }
 }
 .mlPredictionGetPredictions.lda <- function(model, dataset) {
-  hard <- as.character(MASS:::predict.lda(model, newdata = dataset)$class)
   soft <- MASS:::predict.lda(model, newdata = dataset)$posterior
+  hard <- colnames(soft)[max.col(soft, ties.method = "random")]
   return(list(hard, soft))
 }
 .mlPredictionGetPredictions.lm <- function(model, dataset) {
@@ -107,9 +107,9 @@ is.jaspMachineLearning <- function(x) {
 }
 .mlPredictionGetPredictions.gbm <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    soft <- gbm:::predict.gbm(model, newdata = dataset, n.trees = model[["n.trees"]], type = "response")
-    hard <- as.character(colnames(soft)[apply(soft, 1, which.max)])
-    return(list(hard, soft[, , 1]))
+    soft <- gbm:::predict.gbm(model, newdata = dataset, n.trees = model[["n.trees"]], type = "response")[, , 1]
+    hard <- colnames(soft)[max.col(soft, ties.method = "random")]
+    return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
     hard <- as.numeric(gbm:::predict.gbm(model, newdata = dataset, n.trees = model[["n.trees"]], type = "response"))
     return(list(hard))
@@ -117,8 +117,8 @@ is.jaspMachineLearning <- function(x) {
 }
 .mlPredictionGetPredictions.randomForest <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
-    hard <- as.character(randomForest:::predict.randomForest(model, newdata = dataset))
     soft <- predict(model, newdata = dataset, type = "prob")
+    hard <- colnames(soft)[max.col(soft, ties.method = "random")]
     return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
     hard <- as.numeric(randomForest:::predict.randomForest(model, newdata = dataset))
@@ -133,7 +133,7 @@ is.jaspMachineLearning <- function(x) {
   if (inherits(model, "jaspClassification")) {
     soft <- neuralnet:::predict.nn(model, newdata = dataset)
     colnames(soft) <- levels(factor(model[["data"]][[model[["jaspVars"]][["encoded"]]$target]]))
-    hard <- colnames(soft)[apply(soft, 1, which.max)]
+    hard <- colnames(soft)[max.col(soft, ties.method = "random")]
     return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
     hard <- as.numeric(neuralnet:::predict.nn(model, newdata = dataset))
@@ -144,7 +144,7 @@ is.jaspMachineLearning <- function(x) {
   if (inherits(model, "jaspClassification")) {
     soft <- rpart:::predict.rpart(model, newdata = dataset)
     colnames(soft) <- levels(factor(model[["data"]][[model[["jaspVars"]][["encoded"]]$target]]))
-    hard <- colnames(soft)[apply(soft, 1, which.max)]
+    hard <- colnames(soft)[max.col(soft, ties.method = "random")]
     return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
     hard <- as.numeric(rpart:::predict.rpart(model, newdata = dataset))
@@ -154,7 +154,7 @@ is.jaspMachineLearning <- function(x) {
 .mlPredictionGetPredictions.svm <- function(model, dataset) {
   if (inherits(model, "jaspClassification")) {
     soft <- attr(e1071:::predict.svm(model, newdata = dataset, probability = TRUE), "probabilities")
-    hard <- as.character(e1071:::predict.svm(model, newdata = dataset))
+    hard <- colnames(soft)[max.col(soft, ties.method = "random")]
     return(list(hard, soft))
   } else if (inherits(model, "jaspRegression")) {
     hard <- as.numeric(e1071:::predict.svm(model, newdata = dataset))
@@ -163,14 +163,14 @@ is.jaspMachineLearning <- function(x) {
 }
 .mlPredictionGetPredictions.naiveBayes <- function(model, dataset) {
   soft <- e1071:::predict.naiveBayes(model, newdata = dataset, type = "raw")
-  hard <- as.character(e1071:::predict.naiveBayes(model, newdata = dataset, type = "class"))
+  hard <- colnames(soft)[max.col(soft, ties.method = "random")]
   return(list(hard, soft))
 }
 .mlPredictionGetPredictions.glm <- function(model, dataset) {
   probs <- predict(model, newdata = dataset, type = "response")
   soft <- matrix(c(1 - probs, probs), ncol = 2)
   colnames(soft) <- levels(as.factor(model$model[[model[["jaspVars"]][["encoded"]]$target]]))
-  hard <- colnames(soft)[apply(soft, 1, which.max)]
+  hard <- colnames(soft)[max.col(soft, ties.method = "random")]
   return(list(hard, soft))
 }
 .mlPredictionGetPredictions.vglm <- function(model, dataset) {
@@ -183,7 +183,7 @@ is.jaspMachineLearning <- function(x) {
   soft[, ncategories] <- 1
   soft <- soft / rowSums(soft)
   colnames(soft) <- as.character(levels(as.factor(model$target)))
-  hard <- colnames(soft)[apply(soft, 1, which.max)]
+  hard <- colnames(soft)[max.col(soft, ties.method = "random")]
   return(list(hard, soft))
 }
 

From 2346fe1972ced416587d504c0edde64255b90153 Mon Sep 17 00:00:00 2001
From: Koen Derks <koen-derks@hotmail.com>
Date: Thu, 27 Mar 2025 08:30:28 +0100
Subject: [PATCH 3/4] Make sure shap table works

---
 R/commonMachineLearningRegression.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/commonMachineLearningRegression.R b/R/commonMachineLearningRegression.R
index 8e990f44..90013c24 100644
--- a/R/commonMachineLearningRegression.R
+++ b/R/commonMachineLearningRegression.R
@@ -786,7 +786,7 @@
   } else {
     explainer <- model[["explainer"]]
     x_test <- dataset[, predictors]
-    predictions <- .mlPredictionsState(model, dataset, options, jaspResults, ready)[options[["fromIndex"]]:options[["toIndex"]]]
+    predictions <- .mlPredictionsState(model, dataset, options, jaspResults, ready)[[1]][options[["fromIndex"]]:options[["toIndex"]]]
   }
   from <- min(c(options[["fromIndex"]], options[["toIndex"]] - 1, nrow(x_test)))
   to <- min(c(options[["toIndex"]], nrow(x_test)))

From 6e5752881c10790629e90150416e5869b0836d47 Mon Sep 17 00:00:00 2001
From: Koen Derks <koen-derks@hotmail.com>
Date: Thu, 27 Mar 2025 08:33:57 +0100
Subject: [PATCH 4/4] Update info

---
 inst/qml/common/ui/ExportResults.qml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/qml/common/ui/ExportResults.qml b/inst/qml/common/ui/ExportResults.qml
index 43a2158c..13389d47 100644
--- a/inst/qml/common/ui/ExportResults.qml
+++ b/inst/qml/common/ui/ExportResults.qml
@@ -53,7 +53,7 @@ Group
 			name:					"addProbabilities"
 			text:					qsTr("Add probabilities (classification only)")
 			visible:				showProbs
-			info:					qsTr("In classification analyses, also add the predicted probabilities for each class to the data.")
+			info:					qsTr("In classification analyses, append the predicted probabilities for each class to the data. For neural networks, this option provides the output of the final layer.")
 		}
 	}