diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index daafcc1b..3e7a01e2 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -455,6 +455,9 @@ plot$dependOn(options = c(.mlClassificationDependencies(options), "decisionBoundary", "pointsShown", "legendShown")) jaspResults[["decisionBoundary"]] <- plot if (!ready || length(options[["predictors"]]) < 2) { + if (length(options[["predictors"]]) == 1) { + plot$setError(gettext("Cannot create plot: You need at least two (numeric) features to create the decision boundary matrix. You have currently included only one feature.")) + } return() } .classificationFillDecisionBoundary(dataset, options, jaspResults, plot, type) @@ -520,9 +523,9 @@ x_max <- xBreaks[length(xBreaks)] y_min <- yBreaks[1] y_max <- yBreaks[length(yBreaks)] - # Adjust the graining - hs <- min(c(diff(range(xBreaks)), diff(range(yBreaks)))) / 50 - grid <- as.data.frame(expand.grid(seq(x_min, x_max, by = hs), seq(y_min, y_max, by = hs))) + xseq <- seq(x_min, x_max, length.out = 100) + yseq <- seq(y_min, y_max, length.out = 100) + grid <- as.data.frame(expand.grid(xseq, yseq)) colnames(grid) <- colnames(predictors) classificationResult <- jaspResults[["classificationResult"]]$object if (type == "lda") { diff --git a/R/commonMachineLearningClustering.R b/R/commonMachineLearningClustering.R index 735ce775..b646a2c3 100644 --- a/R/commonMachineLearningClustering.R +++ b/R/commonMachineLearningClustering.R @@ -476,7 +476,7 @@ table$setData(clusterMeans) } -.mlClusteringPlotDensities <- function(dataset, options, jaspResults, ready, position) { +.mlClusteringPlotDensities <- function(dataset, options, jaspResults, ready, position, type) { if (!is.null(jaspResults[["clusterDensities"]]) || !options[["clusterDensityPlot"]]) { return() } @@ -488,9 +488,15 @@ return() } clusterResult <- jaspResults[["clusterResult"]]$object + predictions <- clusterResult[["pred.values"]] + ncolors <- clusterResult[["clusters"]] + if (type == "densitybased") { + ncolors <- ncolors + 1 + predictions[predictions == 0] <- gettext("Noisepoint") + } + clusters <- as.factor(predictions) if (!options[["clusterDensityPlotSingleFigure"]]) { for (variable in unlist(options[["predictors"]])) { - clusters <- as.factor(clusterResult[["pred.values"]]) xBreaks <- jaspGraphs::getPrettyAxisBreaks(dataset[[variable]], min.n = 4) plotData <- data.frame( cluster = clusters, @@ -500,7 +506,7 @@ ggplot2::geom_density(mapping = ggplot2::aes(fill = cluster), color = "black", alpha = 0.6) + ggplot2::scale_x_continuous(name = variable, breaks = xBreaks, limits = range(xBreaks)) + ggplot2::scale_y_continuous(name = gettext("Density")) + - ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(length(levels(clusters)))) + + ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) + jaspGraphs::geom_rangeframe() + jaspGraphs::themeJaspRaw(legend.position = "right") + ggplot2::theme(axis.ticks.y = ggplot2::element_blank(), axis.text.y = ggplot2::element_blank()) @@ -509,11 +515,11 @@ } } else { dataList <- c(dataset[, options[["predictors"]]]) - plotData <- data.frame(value = unlist(dataList), variable = rep(options[["predictors"]], lengths(dataList)), cluster = rep(clusterResult[["pred.values"]], length(options[["predictors"]]))) + plotData <- data.frame(value = unlist(dataList), variable = rep(options[["predictors"]], lengths(dataList)), cluster = rep(predictions, length(options[["predictors"]]))) xBreaks <- jaspGraphs::getPrettyAxisBreaks(plotData[["value"]]) p <- ggplot2::ggplot(data = plotData, mapping = ggplot2::aes(x = value, y = factor(variable), height = ..density.., fill = factor(cluster))) + ggridges::geom_density_ridges(stat = "density", alpha = .6) + - ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(length(unique(clusterResult[["pred.values"]])))) + + ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) + ggplot2::scale_x_continuous(name = gettext("Value"), breaks = xBreaks, limits = range(xBreaks)) + ggplot2::scale_y_discrete(name = gettext("Feature")) + jaspGraphs::geom_rangeframe(sides = "b") + @@ -523,7 +529,7 @@ } } -.mlClusteringPlotMeans <- function(dataset, options, jaspResults, ready, position) { +.mlClusteringPlotMeans <- function(dataset, options, jaspResults, ready, position, type) { if (!is.null(jaspResults[["clusterMeans"]]) || !options[["clusterMeanPlot"]]) { return() } @@ -536,8 +542,14 @@ } clusterDataset <- data.frame(dataset[, options[["predictors"]], drop = FALSE]) clusterResult <- jaspResults[["clusterResult"]]$object + predictions <- clusterResult[["pred.values"]] + ncolors <- clusterResult[["clusters"]] + if (type == "densitybased") { + ncolors <- ncolors + 1 + predictions[predictions == 0] <- gettext("Noisepoint") + } + clusters <- as.factor(predictions) if (options[["clusterMeanPlotSingleFigure"]]) { - clusters <- as.factor(clusterResult[["pred.values"]]) xBreaks <- c(1, (as.numeric(levels(clusters)) + 1) * length(options[["predictors"]])) clusterMeansData <- aggregate(clusterDataset, list(clusters), mean) clusterSdData <- aggregate(clusterDataset, list(clusters), sd) @@ -578,14 +590,13 @@ } p <- p + ggplot2::scale_x_continuous(name = NULL, breaks = xBreaks, labels = xLabels) + ggplot2::scale_y_continuous(name = gettext("Cluster Mean"), breaks = yBreaks, limits = range(yBreaks)) + - ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(length(unique(clusterResult[["pred.values"]])))) + + ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) + jaspGraphs::geom_rangeframe(sides = "l") + jaspGraphs::themeJaspRaw(legend.position = "right") + ggplot2::theme(axis.ticks.x = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(angle = 20)) plot[["oneFigure"]] <- createJaspPlot(plot = p, title = gettext("All Features"), height = 400, width = 200 * length(options[["predictors"]])) } else { for (variable in unlist(options[["predictors"]])) { - clusters <- as.factor(clusterResult[["pred.values"]]) xBreaks <- as.numeric(levels(clusters)) clusterMeansData <- aggregate(clusterDataset[[variable]], list(clusters), mean) clusterSdData <- aggregate(clusterDataset[[variable]], list(clusters), sd) @@ -608,7 +619,7 @@ } p <- p + ggplot2::scale_x_discrete(name = gettext("Cluster"), breaks = xBreaks) + ggplot2::scale_y_continuous(name = variable, breaks = yBreaks, limits = range(yBreaks)) + - ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(length(unique(clusterResult[["pred.values"]])))) + + ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) + jaspGraphs::geom_rangeframe(sides = "l") + jaspGraphs::themeJaspRaw() + ggplot2::theme(axis.ticks.x = ggplot2::element_blank()) @@ -634,7 +645,7 @@ unlist(regmatches(p[[1]], gregexpr("[[:digit:]]+\\.*[[:digit:]]*", p[[1]]))) } -.mlClusteringMatrixPlot <- function(dataset, options, jaspResults, ready, position) { +.mlClusteringMatrixPlot <- function(dataset, options, jaspResults, ready, position, type) { if (!is.null(jaspResults[["matrixPlot"]]) || !options[["matrixPlot"]]) { return() } @@ -667,17 +678,23 @@ oldFontSize <- jaspGraphs::getGraphOption("fontsize") jaspGraphs::setGraphOption("fontsize", .85 * oldFontSize) startProgressbar(length(plotMat) + 1) + ncolors <- clusterResult[["clusters"]] + predictions <- clusterResult[["pred.values"]] + if (type == "densitybased") { + ncolors <- ncolors + 1 + predictions[predictions == 0] <- gettext("Noisepoint") + } for (row in 2:l) { for (col in 1:(l - 1)) { if (col < row) { predictors <- dataset[, variables] predictors <- predictors[, c(col, row)] - plotData <- data.frame(x = predictors[, 1], y = predictors[, 2], cluster = as.factor(clusterResult[["pred.values"]])) + plotData <- data.frame(x = predictors[, 1], y = predictors[, 2], cluster = as.factor(predictions)) xBreaks <- jaspGraphs::getPrettyAxisBreaks(plotData$x, min.n = 4) yBreaks <- jaspGraphs::getPrettyAxisBreaks(plotData$y, min.n = 4) p <- ggplot2::ggplot(data = plotData, mapping = ggplot2::aes(x = x, y = y, fill = cluster)) + jaspGraphs::geom_point() + - ggplot2::scale_fill_manual(name = NULL, values = .mlColorScheme(clusterResult[["clusters"]])) + + ggplot2::scale_fill_manual(name = NULL, values = .mlColorScheme(ncolors)) + ggplot2::scale_x_continuous(name = NULL, breaks = xBreaks, limits = range(xBreaks)) + ggplot2::scale_y_continuous(name = NULL, breaks = yBreaks, limits = range(yBreaks)) + jaspGraphs::geom_rangeframe() + @@ -704,7 +721,7 @@ y <- sqrt(lambda1) * sin(theta) * cos(t) + sqrt(lambda2) * cos(theta) * sin(t) + mu_y ellips <- data.frame(x = x, y = y) p <- p + ggplot2::geom_path(data = ellips, mapping = ggplot2::aes(x = x, y = y), color = "black", inherit.aes = FALSE, linewidth = 1.5) + - ggplot2::geom_path(data = ellips, mapping = ggplot2::aes(x = x, y = y), color = .mlColorScheme(clusterResult[["clusters"]])[i], inherit.aes = FALSE, linewidth = 0.75) + ggplot2::geom_path(data = ellips, mapping = ggplot2::aes(x = x, y = y), color = .mlColorScheme(ncolors)[i], inherit.aes = FALSE, linewidth = 0.75) } } @@ -712,13 +729,13 @@ } if (l > 2) { predictors <- dataset[, options[["predictors"]]] - plotData <- data.frame(cluster = as.factor(clusterResult[["pred.values"]]), predictor = predictors[, 1]) + plotData <- data.frame(cluster = as.factor(predictions), predictor = predictors[, 1]) p <- ggplot2::ggplot(plotData, ggplot2::aes(y = cluster, x = cluster, show.legend = TRUE)) + jaspGraphs::geom_point(ggplot2::aes(fill = cluster), alpha = 0) + ggplot2::xlab(NULL) + ggplot2::ylab(NULL) + ggplot2::theme(legend.key = ggplot2::element_blank()) + - ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(clusterResult[["clusters"]])) + + ggplot2::scale_fill_manual(name = gettext("Cluster"), values = .mlColorScheme(ncolors)) + jaspGraphs::geom_rangeframe(sides = "") + jaspGraphs::themeJaspRaw(legend.position = "left") + ggplot2::theme(axis.ticks = ggplot2::element_blank(), axis.text.x = ggplot2::element_blank(), axis.text.y = ggplot2::element_blank()) + diff --git a/R/mlClassificationLda.R b/R/mlClassificationLda.R index 5d1f27ca..79a52db1 100644 --- a/R/mlClassificationLda.R +++ b/R/mlClassificationLda.R @@ -509,7 +509,13 @@ mlClassificationLda <- function(jaspResults, dataset, options, ...) { if (!ready) { return() } - result <- mvnormalTest::mardia(dataset[, options[["predictors"]]]) + p <- try({ + result <- mvnormalTest::mardia(dataset[, options[["predictors"]]]) + }) + if (isTryError(p)) { # Fail gracefully + table$setError(gettextf("An error occurred when creating this table: %s", jaspBase:::.extractErrorMessage(p))) + return() + } table[["statistic"]] <- as.numeric(as.character(result[["mv.test"]][1:2, "Statistic"])) table[["p"]] <- as.numeric(as.character(result[["mv.test"]][1:2, "p-value"])) } diff --git a/R/mlClusteringDensityBased.R b/R/mlClusteringDensityBased.R index 681bacd5..aa665d4f 100644 --- a/R/mlClusteringDensityBased.R +++ b/R/mlClusteringDensityBased.R @@ -46,13 +46,13 @@ mlClusteringDensityBased <- function(jaspResults, dataset, options, ...) { .mlClusteringPlotTsne(dataset, options, jaspResults, ready, position = 6, type = "densitybased") # Create the matrix plot - .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7) + .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7, type = "densitybased") # Create the cluster means plot - .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8) + .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8, type = "densitybased") # Create the cluster densities plot - .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9) + .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9, type = "densitybased") } .densityBasedClustering <- function(dataset, options, jaspResults) { diff --git a/R/mlClusteringFuzzyCMeans.R b/R/mlClusteringFuzzyCMeans.R index 63ce7c27..46c53483 100644 --- a/R/mlClusteringFuzzyCMeans.R +++ b/R/mlClusteringFuzzyCMeans.R @@ -46,13 +46,13 @@ mlClusteringFuzzyCMeans <- function(jaspResults, dataset, options, ...) { .mlClusteringPlotTsne(dataset, options, jaspResults, ready, position = 6, type = "cmeans") # Create the matrix plot - .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7) + .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7, type = "cmeans") # Create the cluster means plot - .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8) + .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8, type = "cmeans") # Create the cluster densities plot - .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9) + .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9, type = "cmeans") } .cMeansClustering <- function(dataset, options, jaspResults, ready) { diff --git a/R/mlClusteringHierarchical.R b/R/mlClusteringHierarchical.R index 568e1dd0..78b45f20 100644 --- a/R/mlClusteringHierarchical.R +++ b/R/mlClusteringHierarchical.R @@ -46,13 +46,13 @@ mlClusteringHierarchical <- function(jaspResults, dataset, options, ...) { .mlClusteringPlotTsne(dataset, options, jaspResults, ready, position = 6, type = "hierarchical") # Create the matrix plot - .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7) + .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7, type = "hierarchical") # Create the cluster means plot - .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8) + .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8, type = "hierarchical") # Create the cluster densities plot - .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9) + .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9, type = "hierarchical") # Create dendrogram .mlClusteringHierarchicalDendogram(dataset, options, jaspResults, ready, position = 10) diff --git a/R/mlClusteringKMeans.R b/R/mlClusteringKMeans.R index d55190d5..40b7c23c 100644 --- a/R/mlClusteringKMeans.R +++ b/R/mlClusteringKMeans.R @@ -46,13 +46,13 @@ mlClusteringKMeans <- function(jaspResults, dataset, options, ...) { .mlClusteringPlotTsne(dataset, options, jaspResults, ready, position = 6, type = "kmeans") # Create the matrix plot - .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7) + .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 7, type = "kmeans") # Create the cluster means plot - .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8) + .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 8, type = "kmeans") # Create the cluster densities plot - .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9) + .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 9, type = "kmeans") } .kMeansClustering <- function(dataset, options, jaspResults, ready) { diff --git a/R/mlClusteringModelBased.R b/R/mlClusteringModelBased.R index 11777c40..0947ff31 100644 --- a/R/mlClusteringModelBased.R +++ b/R/mlClusteringModelBased.R @@ -49,13 +49,13 @@ mlClusteringModelBased <- function(jaspResults, dataset, options, ...) { .mlClusteringPlotTsne(dataset, options, jaspResults, ready, position = 7, type = "modelbased") # Create the matrix plot - .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 8) + .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 8, type = "modelbased") # Create the cluster means plot - .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 9) + .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 9, type = "modelbased") # Create the cluster densities plot - .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 10) + .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 10, type = "modelbased") } emControl <- mclust::emControl diff --git a/R/mlClusteringRandomForest.R b/R/mlClusteringRandomForest.R index 62a40b20..b0a0a14f 100644 --- a/R/mlClusteringRandomForest.R +++ b/R/mlClusteringRandomForest.R @@ -49,13 +49,13 @@ mlClusteringRandomForest <- function(jaspResults, dataset, options, ...) { .mlClusteringPlotTsne(dataset, options, jaspResults, ready, position = 7, type = "randomForest") # Create the matrix plot - .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 8) + .mlClusteringMatrixPlot(dataset, options, jaspResults, ready, position = 8, type = "randomForest") # Create the cluster means plot - .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 9) + .mlClusteringPlotMeans(dataset, options, jaspResults, ready, position = 9, type = "randomForest") # Create the cluster densities plot - .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 10) + .mlClusteringPlotDensities(dataset, options, jaspResults, ready, position = 10, type = "randomForest") } .randomForestClustering <- function(dataset, options, jaspResults) { diff --git a/inst/qml/common/tables/ModelPerformance.qml b/inst/qml/common/tables/ModelPerformance.qml index a0f12f00..3b1e2621 100644 --- a/inst/qml/common/tables/ModelPerformance.qml +++ b/inst/qml/common/tables/ModelPerformance.qml @@ -24,5 +24,5 @@ CheckBox { name: "validationMeasures" text: qsTr("Model performance") - info: qsTr("Displays available model performance metrics. For regression, these metrics include mean squared error (MSE), root mean squared error (RMSE), R2 and more. For classification, these metrics include precision, recall, the F1-score, support, AUC (area under the ROC curve) and more.") + info: qsTr("Displays available model performance metrics. For regression, these metrics include mean squared error (MSE), root mean squared error (RMSE), R2 and more. For classification, these metrics include precision, recall, the F1-score, support, AUC (area under the ROC curve) and more. For clustering, these metrics include entropy, Dunn index and more.") } diff --git a/tests/testthat/_snaps/mlclusteringdensitybased/all-predictors.svg b/tests/testthat/_snaps/mlclusteringdensitybased/all-predictors.svg index a5ab6bae..f6ab1cce 100644 --- a/tests/testthat/_snaps/mlclusteringdensitybased/all-predictors.svg +++ b/tests/testthat/_snaps/mlclusteringdensitybased/all-predictors.svg @@ -18,277 +18,277 @@ - + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -302,39 +302,39 @@ - -Alcohol -Malic -Ash -Alcalinity -Magnesium -Phenols -Flavanoids -Nonflavanoids -Proanthocyanins -Color -Hue -Dilution -Proline + +Alcohol +Malic +Ash +Alcalinity +Magnesium +Phenols +Flavanoids +Nonflavanoids +Proanthocyanins +Color +Hue +Dilution +Proline Cluster Mean - - -Cluster - - - - - - - - - - -0 -1 -2 -3 -4 -all-predictors + + +Cluster + + + + + + + + + + +1 +2 +3 +4 +Noisepoint +all-predictors