diff --git a/R/commonMachineLearningRegression.R b/R/commonMachineLearningRegression.R
index c3edee3a..2adc1282 100644
--- a/R/commonMachineLearningRegression.R
+++ b/R/commonMachineLearningRegression.R
@@ -72,10 +72,6 @@
   if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
     dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
   }
-  # Make sure the test set indicator is numeric
-  if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
-    dataset[[options[["testSetIndicatorVariable"]]]] <- as.numeric(dataset[[options[["testSetIndicatorVariable"]]]])
-  
   return(dataset)
 }
 
@@ -145,6 +141,40 @@
   }
 }
 
+.checkForNewFactorLevelsInPredictionSet <- function(trainingSet, predictionSet, type, model = NULL) {
+  factorNames <- colnames(predictionSet)[sapply(predictionSet, is.factor)]
+  factorNames <- factorNames[which(factorNames %in% colnames(trainingSet))]
+  factorsWithNewLevels <- character()
+  missingLevelsInTrainingSet <- list()
+  for (i in seq_along(factorNames)) {
+    currentFactor <- factorNames[i]
+    factorLevelsInTrainingSet <- unique(trainingSet[[currentFactor]])
+    factorLevelsInPredictionSet <- unique(predictionSet[[currentFactor]])
+    missingLevelsIndex <- which(!(factorLevelsInPredictionSet %in% factorLevelsInTrainingSet))
+    if (length(missingLevelsIndex) > 0) {
+      if (type != "prediction") {
+        factorsWithNewLevels <- c(factorsWithNewLevels, currentFactor)
+        missingLevelsInTrainingSet[[currentFactor]] <- factorLevelsInPredictionSet[missingLevelsIndex]
+      } else {
+        currentFactor <- model[["jaspVars"]][["decoded"]]$predictors[which(model[["jaspVars"]][["encoded"]]$predictors == currentFactor)]
+        factorsWithNewLevels <- c(factorsWithNewLevels, currentFactor)
+        missingLevelsInTrainingSet[[currentFactor]] <- factorLevelsInPredictionSet[missingLevelsIndex]
+      }
+    }
+  }
+  if (length(factorsWithNewLevels) > 0) {
+    setType <- switch(type, "test" = gettext("test set"), "validation" = gettext("validation set"), "prediction" = gettext("new dataset"))
+    additionalMessage <- switch(type,
+      "test" = gettext(" or use a different test set (e.g., automatically by setting a different seed or manually by specifying the test set indicator)"), 
+      "validation" = gettext(" or use a different validation set by setting a different seed"),
+      "prediction" = "")
+    factorMessage <- paste(sapply(factorsWithNewLevels, function(i) {
+        paste0("Factor: ", i, "; Levels: ", paste(missingLevelsInTrainingSet[[i]], collapse = ", "))
+      }), collapse = "\n")
+    jaspBase:::.quitAnalysis(gettextf("Some factors in the %1$s have levels that do not appear in the training set. Please remove the rows with the following levels from the dataset%2$s.\n\n%3$s", setType, additionalMessage, factorMessage))
+  }
+}
+
 .getCustomErrorChecksKnnBoosting <- function(dataset, options, type) {
   if (!type %in% c("knn", "boosting")) {
     return()
diff --git a/R/mlClassificationBoosting.R b/R/mlClassificationBoosting.R
index 1c37f90a..8edcfa69 100644
--- a/R/mlClassificationBoosting.R
+++ b/R/mlClassificationBoosting.R
@@ -98,6 +98,8 @@ mlClassificationBoosting <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     noOfFolds <- 0
     .mlBoostingCheckMinObsNode(options, trainingSet) # Check for min obs in nodes
     fit <- gbm::gbm(
@@ -113,6 +115,10 @@ mlClassificationBoosting <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     if (options[["modelValid"]] == "validationManual") {
       noOfFolds <- 0
     } else if (options[["modelValid"]] == "validationKFold") {
diff --git a/R/mlClassificationDecisionTree.R b/R/mlClassificationDecisionTree.R
index 3c3f31b9..cef12cf9 100644
--- a/R/mlClassificationDecisionTree.R
+++ b/R/mlClassificationDecisionTree.R
@@ -89,6 +89,8 @@ mlClassificationDecisionTree <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     complexityPenalty <- options[["complexityParameter"]]
     trainingFit <- rpart::rpart(
       formula = formula, data = trainingSet, method = "class", x = TRUE, y = TRUE,
@@ -101,6 +103,10 @@ mlClassificationDecisionTree <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     cps <- seq(0, options[["maxComplexityParameter"]], by = 0.01)
     accuracyStore <- trainAccuracyStore <- numeric(length(cps))
     startProgressbar(length(cps))
diff --git a/R/mlClassificationKnn.R b/R/mlClassificationKnn.R
index 8d6e0268..b3b4a3d4 100644
--- a/R/mlClassificationKnn.R
+++ b/R/mlClassificationKnn.R
@@ -89,6 +89,8 @@ mlClassificationKnn <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     testFit <- kknn::kknn(
       formula = formula, train = trainingSet, test = testSet, k = options[["noOfNearestNeighbours"]],
       distance = distance, kernel = weights, scale = FALSE
@@ -100,6 +102,10 @@ mlClassificationKnn <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     if (options[["modelValid"]] == "validationManual") {
       nnRange <- 1:options[["maxNearestNeighbors"]]
       accuracyStore <- numeric(length(nnRange))
diff --git a/R/mlClassificationLda.R b/R/mlClassificationLda.R
index 7a718e2a..5d1f27ca 100644
--- a/R/mlClassificationLda.R
+++ b/R/mlClassificationLda.R
@@ -112,6 +112,8 @@ mlClassificationLda <- function(jaspResults, dataset, options, ...) {
   }
   trainingSet <- dataset[trainingIndex, ]
   testSet <- dataset[-trainingIndex, ]
+  # Check for factor levels in the test set that are not in the training set
+  .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
   # Create the generated test set indicator
   testIndicatorColumn <- rep(1, nrow(dataset))
   testIndicatorColumn[trainingIndex] <- 0
diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R
index 465e7ed5..71f5af13 100644
--- a/R/mlClassificationLogisticMultinomial.R
+++ b/R/mlClassificationLogisticMultinomial.R
@@ -80,6 +80,8 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, .
   testIndicatorColumn[trainingIndex] <- 0
   # Just create a train and a test set (no optimization)
   testSet <- dataset[-trainingIndex, ]
+  # Check for factor levels in the test set that are not in the training set
+  .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
   # Create the formula
   if (options[["intercept"]]) {
     formula <- formula(paste(options[["target"]], "~ 1 + ", paste(options[["predictors"]], collapse = " + ")))
diff --git a/R/mlClassificationNaiveBayes.R b/R/mlClassificationNaiveBayes.R
index 0ae613be..d2f7a9e0 100644
--- a/R/mlClassificationNaiveBayes.R
+++ b/R/mlClassificationNaiveBayes.R
@@ -81,6 +81,8 @@ mlClassificationNaiveBayes <- function(jaspResults, dataset, options, ...) {
   testIndicatorColumn[trainingIndex] <- 0
   # Just create a train and a test set (no optimization)
   testSet <- dataset[-trainingIndex, ]
+  # Check for factor levels in the test set that are not in the training set
+  .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
   trainingFit <- e1071::naiveBayes(formula, data = trainingSet, laplace = options[["smoothingParameter"]])
   # Use the specified model to make predictions for dataset
   testPredictions <- predict(trainingFit, newdata = testSet, type = "class")
diff --git a/R/mlClassificationNeuralNetwork.R b/R/mlClassificationNeuralNetwork.R
index 053e9765..e228b857 100644
--- a/R/mlClassificationNeuralNetwork.R
+++ b/R/mlClassificationNeuralNetwork.R
@@ -95,6 +95,8 @@ mlClassificationNeuralNetwork <- function(jaspResults, dataset, options, ...) {
   if (options[["modelOptimization"]] == "manual") {
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     structure <- .getNeuralNetworkStructure(options)
     p <- try({
       fit <- neuralnet::neuralnet(
@@ -120,6 +122,10 @@ mlClassificationNeuralNetwork <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     accuracyStore <- numeric(options[["maxGenerations"]])
     trainAccuracyStore <- numeric(options[["maxGenerations"]])
     # For plotting
diff --git a/R/mlClassificationRandomForest.R b/R/mlClassificationRandomForest.R
index 81d67800..a9f6848c 100644
--- a/R/mlClassificationRandomForest.R
+++ b/R/mlClassificationRandomForest.R
@@ -92,6 +92,8 @@ mlClassificationRandomForest <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     testFit <- randomForest::randomForest(
       x = trainingSet[, options[["predictors"]]], y = trainingSet[, options[["target"]]],
       xtest = testSet[, options[["predictors"]]], ytest = testSet[, options[["target"]]],
@@ -106,6 +108,10 @@ mlClassificationRandomForest <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     validationFit <- randomForest::randomForest(
       x = trainingSet[, options[["predictors"]]], y = trainingSet[, options[["target"]]],
       xtest = validationSet[, options[["predictors"]]], ytest = validationSet[, options[["target"]]],
diff --git a/R/mlClassificationSvm.R b/R/mlClassificationSvm.R
index dae6a85d..e576d85a 100644
--- a/R/mlClassificationSvm.R
+++ b/R/mlClassificationSvm.R
@@ -86,6 +86,8 @@ mlClassificationSvm <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     cost <- options[["cost"]]
     trainingFit <- e1071::svm(
       formula = formula, data = trainingSet, type = "C-classification", kernel = options[["weights"]], cost = cost, tolerance = options[["tolerance"]],
@@ -97,6 +99,10 @@ mlClassificationSvm <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     costs <- seq(0.01, options[["maxCost"]], 0.01)
     accuracyStore <- trainAccuracyStore <- numeric(length(costs))
     startProgressbar(length(costs))
diff --git a/R/mlPrediction.R b/R/mlPrediction.R
index 6ca7d1a5..dfae59ac 100644
--- a/R/mlPrediction.R
+++ b/R/mlPrediction.R
@@ -221,7 +221,7 @@ is.jaspMachineLearning <- function(x) {
 
 # also define methods for other objects
 .mlPredictionReady <- function(model, dataset, options) {
-  if (!is.null(model)) {
+  if (!is.null(model) && !is.null(dataset)) {
     modelVars <- model[["jaspVars"]][["encoded"]]$predictors
     presentVars <- colnames(dataset)
     ready <- all(modelVars %in% presentVars)
@@ -241,12 +241,25 @@ is.jaspMachineLearning <- function(x) {
 }
 
 .mlPredictionReadData <- function(dataset, options, model) {
-  dataset <- jaspBase::excludeNaListwise(dataset, options[["predictors"]])
-  if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) {
-    dataset <- .scaleNumericData(dataset)
+  if (length(options[["predictors"]]) == 0) {
+    dataset <- NULL
+  } else {
+    dataset <- jaspBase::excludeNaListwise(dataset, options[["predictors"]])
+    if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) {
+      dataset <- .scaleNumericData(dataset)
+    }
+    # Select only the predictors in the model to prevent accidental double column names
+    dataset <- dataset[, which(decodeColNames(colnames(dataset)) %in% model[["jaspVars"]][["decoded"]]$predictors)]
+    # Ensure the column names in the dataset match those in the training data
+    colnames(dataset) <- .matchDecodedNames(colnames(dataset), model)
+    # Retrieve the training set
+    trainingSet <- model[["explainer"]]$data
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, dataset, "prediction", model)
+    # Ensure factor variables in dataset have same levels as those in the training data
+    factorColumns <- colnames(dataset)[sapply(dataset, is.factor)]
+    dataset[factorColumns] <- lapply(factorColumns, function(i) factor(dataset[[i]], levels = levels(trainingSet[[i]])))
   }
-  dataset <- dataset[, which(decodeColNames(colnames(dataset)) %in% model[["jaspVars"]][["decoded"]]$predictors)] # Filter only predictors to prevent accidental double column names
-  colnames(dataset) <- .matchDecodedNames(colnames(dataset), model)
   return(dataset)
 }
 
diff --git a/R/mlRegressionBoosting.R b/R/mlRegressionBoosting.R
index 23c845c5..38e3b459 100644
--- a/R/mlRegressionBoosting.R
+++ b/R/mlRegressionBoosting.R
@@ -86,6 +86,8 @@ mlRegressionBoosting <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     noOfFolds <- 0
     .mlBoostingCheckMinObsNode(options, trainingSet) # Check for min obs in nodes
     trainingFit <- gbm::gbm(
@@ -101,6 +103,10 @@ mlRegressionBoosting <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     if (options[["modelValid"]] == "validationManual") {
       noOfFolds <- 0
     } else if (options[["modelValid"]] == "validationKFold") {
diff --git a/R/mlRegressionDecisionTree.R b/R/mlRegressionDecisionTree.R
index 4ba00e9f..31d10b45 100644
--- a/R/mlRegressionDecisionTree.R
+++ b/R/mlRegressionDecisionTree.R
@@ -77,6 +77,8 @@ mlRegressionDecisionTree <- function(jaspResults, dataset, options, state = NULL
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     complexityPenalty <- options[["complexityParameter"]]
     trainingFit <- rpart::rpart(
       formula = formula, data = trainingSet, method = "anova", x = TRUE, y = TRUE,
@@ -89,6 +91,10 @@ mlRegressionDecisionTree <- function(jaspResults, dataset, options, state = NULL
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     cps <- seq(0, options[["maxComplexityParameter"]], by = 0.01)
     errorStore <- trainErrorStore <- numeric(length(cps))
     startProgressbar(length(cps))
diff --git a/R/mlRegressionKnn.R b/R/mlRegressionKnn.R
index 5312e14b..32828523 100644
--- a/R/mlRegressionKnn.R
+++ b/R/mlRegressionKnn.R
@@ -77,6 +77,8 @@ mlRegressionKnn <- function(jaspResults, dataset, options, state = NULL) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     testFit <- kknn::kknn(
       formula = formula, train = trainingSet, test = testSet, k = options[["noOfNearestNeighbours"]],
       distance = distance, kernel = weights, scale = FALSE
@@ -88,6 +90,10 @@ mlRegressionKnn <- function(jaspResults, dataset, options, state = NULL) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     if (options[["modelValid"]] == "validationManual") {
       nnRange <- 1:options[["maxNearestNeighbors"]]
       errorStore <- numeric(length(nnRange))
diff --git a/R/mlRegressionLinear.R b/R/mlRegressionLinear.R
index 71b844c0..49f40591 100644
--- a/R/mlRegressionLinear.R
+++ b/R/mlRegressionLinear.R
@@ -68,6 +68,8 @@ mlRegressionLinear <- function(jaspResults, dataset, options, ...) {
   # Just create a train and a test set (no optimization)
   trainingSet <- trainingAndValidationSet
   testSet <- dataset[-trainingIndex, ]
+  # Check for factor levels in the test set that are not in the training set
+  .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
   if (options[["intercept"]]) {
     formula <- formula(paste(options[["target"]], "~ 1 + ", paste(options[["predictors"]], collapse = " + ")))
   } else {
diff --git a/R/mlRegressionNeuralNetwork.R b/R/mlRegressionNeuralNetwork.R
index b2d3c0d6..906d9ddd 100644
--- a/R/mlRegressionNeuralNetwork.R
+++ b/R/mlRegressionNeuralNetwork.R
@@ -140,6 +140,8 @@ mlRegressionNeuralNetwork <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     # Structure of neural network
     structure <- .getNeuralNetworkStructure(options)
     p <- try({
@@ -167,6 +169,10 @@ mlRegressionNeuralNetwork <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     valid <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, valid, "validation")
     errorStore <- numeric(options[["maxGenerations"]])
     trainErrorStore <- numeric(options[["maxGenerations"]])
     # For plotting
diff --git a/R/mlRegressionRandomForest.R b/R/mlRegressionRandomForest.R
index 7f02eaba..f599455d 100644
--- a/R/mlRegressionRandomForest.R
+++ b/R/mlRegressionRandomForest.R
@@ -80,6 +80,8 @@ mlRegressionRandomForest <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     testFit <- randomForest::randomForest(
       x = trainingSet[, options[["predictors"]]], y = trainingSet[, options[["target"]]],
       xtest = testSet[, options[["predictors"]]], ytest = testSet[, options[["target"]]],
@@ -94,6 +96,10 @@ mlRegressionRandomForest <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     validationFit <- randomForest::randomForest(
       x = trainingSet[, options[["predictors"]]], y = trainingSet[, options[["target"]]],
       xtest = validationSet[, options[["predictors"]]], ytest = validationSet[, options[["target"]]],
diff --git a/R/mlRegressionRegularized.R b/R/mlRegressionRegularized.R
index b02575ec..2c9f0b61 100644
--- a/R/mlRegressionRegularized.R
+++ b/R/mlRegressionRegularized.R
@@ -87,6 +87,8 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     trainingFit <- glmnet::cv.glmnet(
       x = as.matrix(trainingSet[, options[["predictors"]]]), y = trainingSet[, options[["target"]]],
       nfolds = 10, type.measure = "deviance",
@@ -106,6 +108,10 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     trainingWeights <- weights[trainingIndex]
     trainingFit <- glmnet::cv.glmnet(
       x = as.matrix(trainingSet[, options[["predictors"]]]), y = trainingSet[, options[["target"]]],
diff --git a/R/mlRegressionSvm.R b/R/mlRegressionSvm.R
index 19e5480a..7a634524 100644
--- a/R/mlRegressionSvm.R
+++ b/R/mlRegressionSvm.R
@@ -74,6 +74,8 @@ mlRegressionSvm <- function(jaspResults, dataset, options, state = NULL) {
     # Just create a train and a test set (no optimization)
     trainingSet <- trainingAndValidationSet
     testSet <- dataset[-trainingIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
     cost <- options[["cost"]]
     trainingFit <- e1071::svm(
       formula = formula, data = trainingSet, type = "eps-regression", kernel = options[["weights"]], cost = cost, tolerance = options[["tolerance"]],
@@ -85,6 +87,10 @@ mlRegressionSvm <- function(jaspResults, dataset, options, state = NULL) {
     testSet <- dataset[-trainingIndex, ]
     validationSet <- trainingAndValidationSet[validationIndex, ]
     trainingSet <- trainingAndValidationSet[-validationIndex, ]
+    # Check for factor levels in the test set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, testSet, "test")
+    # Check for factor levels in the validation set that are not in the training set
+    .checkForNewFactorLevelsInPredictionSet(trainingSet, validationSet, "validation")
     costs <- seq(0.01, options[["maxCost"]], 0.01)
     errorStore <- trainErrorStore <- numeric(length(costs))
     startProgressbar(length(costs))
diff --git a/inst/qml/common/ui/DataSplit.qml b/inst/qml/common/ui/DataSplit.qml
index 80a7f9a8..4f65ddcf 100644
--- a/inst/qml/common/ui/DataSplit.qml
+++ b/inst/qml/common/ui/DataSplit.qml
@@ -84,7 +84,7 @@ Section
 			name: 								"testSetIndicator"
 			label: 								qsTr("Test set indicator")
 			childrenOnSameRow: 					true
-			info:								qsTr("Use an indicator variable to select data for the test set. This indicator should be a column in your data that consists of only 0 (excluded from the test set) and 1 (included in the test set). The data will then be split into a training (and validation if requested) set (0), and a test set (1) according to your indicator.")
+			info:								qsTr("Use an indicator variable to select data for the test set. This indicator should be a column of type scale in your data that consists of only 0 (excluded from the test set) and 1 (included in the test set). The data will then be split into a training (and validation if requested) set (0), and a test set (1) according to your indicator.")
 
 			DropDown
 			{
@@ -94,6 +94,7 @@ Section
 				addEmptyValue: 					true
 				placeholderText: 				qsTr("None")
 				info:							qsTr("The variable in the data set that is used as the test set indicator.")
+				allowedColumns:					"scale"
 			}
 		}
 	}