|
32 | 32 | "mutationMethod", "survivalMethod", "elitismProportion", "candidates", # Neural network
|
33 | 33 | "noOfTrees", "maxTrees", "baggingFraction", "noOfPredictors", "numberOfPredictors", # Random forest
|
34 | 34 | "complexityParameter", "degree", "gamma", "cost", "tolerance", "epsilon", "maxCost", # Support vector machine
|
35 |
| - "smoothingParameter" # Naive Bayes |
| 35 | + "smoothingParameter", # Naive Bayes |
| 36 | + "intercept", "link" # Logistic |
36 | 37 | )
|
37 | 38 | if (includeSaveOptions) {
|
38 | 39 | opt <- c(opt, "saveModel", "savePath")
|
|
62 | 63 | if (type == "lda" || type == "randomForest" || type == "boosting") {
|
63 | 64 | # Require at least 2 features
|
64 | 65 | ready <- length(options[["predictors"]][options[["predictors"]] != ""]) >= 2 && options[["target"]] != ""
|
65 |
| - } else if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "naivebayes") { |
| 66 | + } else if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "naivebayes" || type == "logistic") { |
66 | 67 | # Require at least 1 features
|
67 | 68 | ready <- length(options[["predictors"]][options[["predictors"]] != ""]) >= 1 && options[["target"]] != ""
|
68 | 69 | }
|
|
93 | 94 | "neuralnet" = .neuralnetClassification(dataset, options, jaspResults),
|
94 | 95 | "rpart" = .decisionTreeClassification(dataset, options, jaspResults),
|
95 | 96 | "svm" = .svmClassification(dataset, options, jaspResults),
|
96 |
| - "naivebayes" = .naiveBayesClassification(dataset, options, jaspResults) |
| 97 | + "naivebayes" = .naiveBayesClassification(dataset, options, jaspResults), |
| 98 | + "logistic" = .logisticMultinomialClassification(dataset, options, jaspResults) |
97 | 99 | )
|
98 | 100 | })
|
99 | 101 | if (isTryError(p)) { # Fail gracefully
|
|
116 | 118 | "neuralnet" = gettext("Neural Network Classification"),
|
117 | 119 | "rpart" = gettext("Decision Tree Classification"),
|
118 | 120 | "svm" = gettext("Support Vector Machine Classification"),
|
119 |
| - "naivebayes" = gettext("Naive Bayes Classification") |
| 121 | + "naivebayes" = gettext("Naive Bayes Classification"), |
| 122 | + "logistic" = gettext("Logistic / Multinomial Regression Classification") |
120 | 123 | )
|
121 | 124 | tableTitle <- gettextf("Model Summary: %1$s", title)
|
122 | 125 | table <- createJaspTable(tableTitle)
|
|
147 | 150 | table$addColumnInfo(name = "vectors", title = gettext("Support Vectors"), type = "integer")
|
148 | 151 | } else if (type == "naivebayes") {
|
149 | 152 | table$addColumnInfo(name = "smoothing", title = gettext("Smoothing"), type = "number")
|
| 153 | + } else if (type == "logistic") { |
| 154 | + table$addColumnInfo(name = "family", title = gettext("Family"), type = "string") |
| 155 | + table$addColumnInfo(name = "link", title = gettext("Link"), type = "string") |
150 | 156 | }
|
151 | 157 | # Add common columns
|
152 | 158 | table$addColumnInfo(name = "nTrain", title = gettext("n(Train)"), type = "integer")
|
|
164 | 170 | }
|
165 | 171 | # If no analysis is run, specify the required variables in a footnote
|
166 | 172 | if (!ready) {
|
167 |
| - table$addFootnote(gettextf("Please provide a target variable and at least %i feature variable(s).", if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm") 1L else 2L)) |
| 173 | + table$addFootnote(gettextf("Please provide a target variable and at least %i feature variable(s).", if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "logistic") 1L else 2L)) |
168 | 174 | }
|
169 | 175 | if (options[["savePath"]] != "") {
|
170 | 176 | validNames <- (length(grep(" ", decodeColNames(colnames(dataset)))) == 0) && (length(grep("_", decodeColNames(colnames(dataset)))) == 0)
|
|
312 | 318 | testAcc = classificationResult[["testAcc"]]
|
313 | 319 | )
|
314 | 320 | table$addRows(row)
|
| 321 | + } else if (type == "logistic") { |
| 322 | + if (classificationResult[["family"]] == "binomial") { |
| 323 | + table$title <- gettext("Model Summary: Logistic Regression Classification") |
| 324 | + } else { |
| 325 | + table$title <- gettext("Model Summary: Multinomial Regression Classification") |
| 326 | + } |
| 327 | + family <- classificationResult[["family"]] |
| 328 | + link <- classificationResult[["link"]] |
| 329 | + row <- data.frame( |
| 330 | + family = paste0(toupper(substr(family, 1, 1)), substr(family, 2, nchar(family))), |
| 331 | + link = paste0(toupper(substr(link, 1, 1)), substr(link, 2, nchar(link))), |
| 332 | + nTrain = nTrain, |
| 333 | + nTest = classificationResult[["ntest"]], |
| 334 | + testAcc = classificationResult[["testAcc"]] |
| 335 | + ) |
| 336 | + table$addRows(row) |
315 | 337 | }
|
316 | 338 | # Save the applied model if requested
|
317 | 339 | if (options[["saveModel"]] && options[["savePath"]] != "") {
|
|
564 | 586 | fit <- e1071::naiveBayes(formula, data = dataset, laplace = options[["smoothingParameter"]])
|
565 | 587 | predictions <- as.factor(max.col(predict(fit, newdata = grid, type = "raw")))
|
566 | 588 | levels(predictions) <- unique(dataset[, options[["target"]]])
|
| 589 | + } else if (type == "logistic") { |
| 590 | + if (classificationResult[["family"]] == "binomial") { |
| 591 | + fit <- stats::glm(formula, data = dataset, family = stats::binomial(link = options[["link"]])) |
| 592 | + predictions <- as.factor(round(predict(fit, grid, type = "response"), 0)) |
| 593 | + levels(predictions) <- unique(dataset[, options[["target"]]]) |
| 594 | + } else { |
| 595 | + fit <- VGAM::vglm(formula, data = dataset, family = VGAM::multinomial()) |
| 596 | + logodds <- predict(fit, newdata = grid) |
| 597 | + ncategories <- ncol(logodds) + 1 |
| 598 | + probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories) |
| 599 | + for (i in seq_len(ncategories - 1)) { |
| 600 | + probabilities[, i] <- exp(logodds[, i]) |
| 601 | + } |
| 602 | + probabilities[, ncategories] <- 1 |
| 603 | + row_sums <- rowSums(probabilities) |
| 604 | + probabilities <- probabilities / row_sums |
| 605 | + predicted_columns <- apply(probabilities, 1, which.max) |
| 606 | + categories <- levels(dataset[[options[["target"]]]]) |
| 607 | + predictions <- as.factor(categories[predicted_columns]) |
| 608 | + } |
567 | 609 | }
|
568 | 610 | shapes <- rep(21, nrow(dataset))
|
569 | 611 | if (type == "svm") {
|
|
703 | 745 | } else if (type == "naivebayes") {
|
704 | 746 | fit <- e1071::naiveBayes(formula = formula, data = typeData, laplace = options[["smoothingParameter"]])
|
705 | 747 | score <- max.col(predict(fit, test, type = "raw"))
|
| 748 | + } else if (type == "logistic") { |
| 749 | + fit <- stats::glm(formula, data = typeData, family = stats::binomial(link = options[["link"]])) |
| 750 | + score <- round(predict(fit, test, type = "response"), 0) |
706 | 751 | }
|
707 | 752 | pred <- ROCR::prediction(score, actual.class)
|
708 | 753 | nbperf <- ROCR::performance(pred, "tpr", "fpr")
|
|
1120 | 1165 | score <- max.col(predict(fit, test, type = "raw"))
|
1121 | 1166 | return(score)
|
1122 | 1167 | }
|
| 1168 | + |
| 1169 | +.calcAUCScore.logisticClassification <- function(AUCformula, test, typeData, options, jaspResults, ...) { |
| 1170 | + fit <- stats::glm(AUCformula, data = typeData, family = stats::binomial(link = options[["link"]])) |
| 1171 | + score <- round(predict(fit, test, type = "response"), 0) |
| 1172 | + return(score) |
| 1173 | +} |
0 commit comments