From 09f1fac9d9df16d1b666af584e151519d5b15713 Mon Sep 17 00:00:00 2001 From: Johnny Date: Mon, 22 Sep 2025 19:27:33 +0200 Subject: [PATCH 1/3] working version of stepwise regression --- R/doeAnalysis.R | 136 ++++++++++++++++++++++++++++----------- inst/qml/doeAnalysis.qml | 13 ++++ 2 files changed, 113 insertions(+), 36 deletions(-) diff --git a/R/doeAnalysis.R b/R/doeAnalysis.R index de109348..8de2bba1 100644 --- a/R/doeAnalysis.R +++ b/R/doeAnalysis.R @@ -70,6 +70,7 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { .doeAnalysisAnovaTable(jaspResults, dependent, options, ready, coded) .doeAnalysisCoefficientsTable(jaspResults, dependent, options, ready, coded) .doeAnalysisEquationTable(jaspResults, dependent, options, ready, coded) + .doeAnalysisStepwiseTable(jaspResults, dependent, options, ready, coded) .doeAnalysisPlotPareto(jaspResults, dependent, options, blocks, covariates, ready) .doeAnalysisPlotEffectNormalDistribution(jaspResults, dependent, options, blocks, covariates, ready) .doeAnalysisPlotQQResiduals(jaspResults, dependent, options, ready) @@ -120,7 +121,7 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { "highestOrder", "order", "continuousFactorsFactorial", "modelTerms", "blocksFactorial", "designType", "continuousFactorsResponseSurface", "codeFactors", "rsmPredefinedModel", "fixedFactorsFactorial", "rsmPredefinedTerms", "dependentFactorial", "covariates", "codeFactorsMethod", "codeFactorsManualTable", - "squaredTerms", "sumOfSquaresType", "squaredTermsCoded") + "squaredTerms", "sumOfSquaresType", "squaredTermsCoded", "stepwiseMethod") return(deps) } @@ -252,8 +253,45 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { covariateString <- paste0(" + ", unlist(covariates), collapse = "") formulaString <- paste0(formulaString, covariateString) } + formula <- as.formula(formulaString) + if (options[["stepwiseMethod"]] != "enter") { + + stepwiseData <- if (options[["codeFactors"]]) datasetCoded else dataset + fullModel <- lm(formula, data = stepwiseData) + + if (options[["stepwiseMethod"]] == "forward") { + initialFormula <- as.formula(paste(dep, "~ 1")) + } else { + initialFormula <- formula + } + + stepwiseModel <- step(lm(initialFormula, data = stepwiseData), + direction = options[["stepwiseMethod"]], + trace = 0, + scope = formula, + keep = function(model, aic) + list(model = model, + aic = aic, + coefficients = coef(model))) + + excludedVars <- setdiff(names(fullModel$coefficients), names(stepwiseModel$coefficients)) + formula <- as.formula(stepwiseModel) + + # Make data frame from stepwise results, with step number, AIC of that model, and formula + stepwiseResult <- data.frame( + stepNr = 1:ncol(stepwiseModel$keep), + AIC = unlist(stepwiseModel$keep[2, ]), + formula = sapply(stepwiseModel$keep[3, ], function(x) + # apply some gsub magic to turn I(A^2) into A^2 + paste(gsub("I\\((.*)\\^2\\)", "\\1^2", names(x)[-1]), collapse = " + ")) + ) + + result[["stepwiseResult"]] <- list(excludedVars = gsub("I\\((.*)\\^2\\)", "\\1^2", excludedVars), + stepwiseResult = stepwiseResult) + } + regressionFit <- lm(formula, data = dataset) regressionFitCoded <- lm(formula, data = datasetCoded) regressionSummary <- summary(regressionFit) @@ -497,40 +535,26 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { resultCoded[["regression"]][["coefficients"]][["p"]] <- rep(NA, length(valid_coefsCoded)) } - ## Model formula - ## uncoded - coefs <- coef(regressionFit)[!is.na(coef(regressionFit))] - coefs <- round(coefs, .numDecimals) + ## Model formula for predictors + coefs <- if (options[["codeFactors"]]) coefsCoded else coefs coefNames <- if (options[["tableAlias"]]) termNamesAliased else termNames - plusOrMin <- sapply(seq_len(length(coefs)), function(x) { - if (coefs[x] > 0) "+" else "-" - }) - filledFormula <- sprintf("%s = %s %s %s %s %s", currentDependent, coefs[1], coefNames[1], plusOrMin[2], abs(coefs[2]), coefNames[2]) - if (length(coefs) > 2) { - for (i in 3:length(coefs)) { - filledFormula <- sprintf("%s %s %s %s", filledFormula, plusOrMin[i], abs(coefs[i]), coefNames[i]) - } - } - #coded - coefsCoded <- coef(regressionFitCoded)[!is.na(coef(regressionFitCoded))] - coefsCoded <- round(coefsCoded, .numDecimals) - coefNames <- if (options[["tableAlias"]]) termNamesAliasedCoded else termNames - plusOrMin <- sapply(seq_len(length(coefsCoded)), function(x) { - if (coefsCoded[x] > 0) "+" else "-" - }) - filledFormulaCoded <- sprintf("%s = %s %s %s %s %s", currentDependent, coefsCoded[1], coefNames[1], plusOrMin[2], abs(coefsCoded[2]), coefNames[2]) - if (length(coefsCoded) > 2) { - for (i in 3:length(coefsCoded)) { - filledFormulaCoded <- sprintf("%s %s %s %s", filledFormulaCoded, plusOrMin[i], abs(coefsCoded[i]), coefNames[i]) - } - } + coefFormula <- paste(ifelse(sign(coefs[-1,1])==1, " +", " -"), + round(abs(coefs[-1,1]), .numDecimals), + coefNames[-1], + collapse = "", sep = " ") + + # Now add dependent name and intercept + filledFormula <- paste0(dep, " = ", + round(coefs[1, 1], .numDecimals), + coefFormula) + result[["regression"]][["filledFormula"]] <- gsubInteractionSymbol(filledFormula) jaspResults[[currentDependent]][["doeResult"]] <- createJaspState(result) jaspResults[[currentDependent]][["doeResult"]]$dependOn(options = .doeAnalysisBaseDependencies()) - resultCoded[["regression"]][["filledFormula"]] <- gsubInteractionSymbol(filledFormulaCoded) + resultCoded[["regression"]][["filledFormula"]] <- gsubInteractionSymbol(filledFormula) jaspResults[[currentDependent]][["doeResultCoded"]] <- createJaspState(resultCoded) jaspResults[[currentDependent]][["doeResultCoded"]]$dependOn(options = .doeAnalysisBaseDependencies()) } @@ -1157,9 +1181,9 @@ get_levels <- function(var, num_levels, dataset) { # calculate model row modelSS <- sum(anovaFit$`Sum Sq`[-nrow(anovaFit)]) modelDf <- sum(anovaFit$Df[-nrow(anovaFit)]) - modelMS <- modelSS / modelDf + modelMS <- if (modelDf != 0) modelSS / modelDf else NA msError <- anovaFit$`Mean Sq`[nrow(anovaFit)] - modelFValue <- if (msError != 0) modelMS / msError else NA + modelFValue <- if (msError != 0 && modelDf != 0) modelMS / msError else NA modelPValue <- if (!is.na(modelFValue)) pf(modelFValue, modelDf, anovaFit$Df[nrow(anovaFit)], lower.tail = FALSE) else NA modelRow <- data.frame(df = modelDf, ss = modelSS, ms = modelMS, f = modelFValue, p = modelPValue) colnames(modelRow) <- colnames(anovaFit) @@ -1326,6 +1350,7 @@ get_levels <- function(var, num_levels, dataset) { if (!is.null(jaspResults[["tableAnova"]])) { return() } + tb <- createJaspTable(gettext("ANOVA")) tb$addColumnInfo(name = "terms", title = gettext("Source"), type = "string") tb$addColumnInfo(name = "adjss", title = gettext("Sum of squares"), type = "number") @@ -1396,15 +1421,16 @@ get_levels <- function(var, num_levels, dataset) { tb$dependOn(options = c("codeFactors", "codeFactorsManualTable", "codeFactorsMethod", "tableEquation", "tableAlias", .doeAnalysisBaseDependencies())) tb2$position <- 4 jaspResults[[dep]][["tableCoefficientsLegend"]] <- tb2 - result2 <- jaspResults[[dep]][["doeResult"]]$object[["regression"]][["factorLevels"]] factorName <- if (options[["tableAlias"]]) result2[["factorNamesAliased"]] else result2[["factorNames"]] - rows2 <- data.frame( - factorName = factorName, - factorLevel = result2[["levels"]] - ) + if (length(factorName) > 0) { + rows2 <- data.frame( + factorName = factorName, + factorLevel = result2[["levels"]] + ) - tb2$addRows(rows2) + tb2$addRows(rows2) + } } } } @@ -1429,6 +1455,44 @@ get_levels <- function(var, num_levels, dataset) { } } +.doeAnalysisStepwiseTable <- function(jaspResults, dependent, options, ready, coded) { + for (dep in dependent) { + if (options[["stepwiseMethod"]] == "enter" || !is.null(jaspResults[[dep]][["result"]])) { + return() + } + + tb <- createJaspTable(gettext("Stepwise results")) + + tb$addColumnInfo(name = "stepNr", title = gettext("Step"), type = "string", combine = TRUE) + tb$addColumnInfo(name = "AIC", title = "AIC", type = "number") + tb$addColumnInfo(name = "formula", title = gettext("Formula"), type = "string") + + tb$dependOn(options = .doeAnalysisBaseDependencies()) + tb$position <- 4 + + jaspResults[[dep]][["stepwiseTable"]] <- tb + + if (!ready || is.null(jaspResults[[dep]][["doeResult"]]) || jaspResults[[dep]]$getError()) { + return() + } + + result <- jaspResults[[dep]][["doeResult"]]$object[["stepwiseResult"]] + + tb$setData(result[["stepwiseResult"]]) + + excludedVars <- result[["excludedVars"]] + if (length(excludedVars) > 0) { + message <- sprintf(ngettext(length(excludedVars), + "The following covariate was considered but not included: %s.", + "The following covariates were considered but not included: %s."), + paste(excludedVars, collapse=", ")) + tb$addFootnote(message) + } + + } +} + + .doeAnalysisPlotPareto <- function(jaspResults, dependent, options, blocks, covariates, ready) { for (dep in dependent) { if (!is.null(jaspResults[[dep]][["plotPareto"]]) || !options[["plotPareto"]]) { diff --git a/inst/qml/doeAnalysis.qml b/inst/qml/doeAnalysis.qml index c3b69ce3..73b99263 100644 --- a/inst/qml/doeAnalysis.qml +++ b/inst/qml/doeAnalysis.qml @@ -89,6 +89,19 @@ Form label: qsTr("Responses") height: 50 * preferencesModel.uiScale } + + DropDown + { + name: "stepwiseMethod" + label: qsTr("Method") + info: qsTr("Specify the order in which the predictors are entered into the model. A block of one or more predictors represents one step in the hierarchy. Note that the present release does not allow for more than one block.") + values: [ + { label: qsTr("Enter"), info: qsTr("All predictors are entered into the model simultaneously.") ,value: "enter"}, + { label: qsTr("Backward"), info: qsTr("Starting with the full model, predictors are removed sequentially based on AIC."), value: "backward"}, + { label: qsTr("Forward"), info: qsTr("Starting with the intercept-only model, predictors are entered sequentially based on AIC.") , value: "forward"} + + ] + } AssignedVariablesList { From 07a83d6203fc5f69437277d1794020c20ede8716 Mon Sep 17 00:00:00 2001 From: Johnny Date: Mon, 22 Sep 2025 19:48:13 +0200 Subject: [PATCH 2/3] improve table aesthetics --- R/doeAnalysis.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/doeAnalysis.R b/R/doeAnalysis.R index 8de2bba1..e1191f4b 100644 --- a/R/doeAnalysis.R +++ b/R/doeAnalysis.R @@ -535,9 +535,12 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { resultCoded[["regression"]][["coefficients"]][["p"]] <- rep(NA, length(valid_coefsCoded)) } - ## Model formula for predictors - coefs <- if (options[["codeFactors"]]) coefsCoded else coefs - coefNames <- if (options[["tableAlias"]]) termNamesAliased else termNames + ## Model formula for predictors, based on coding/aliasing + if (options[["tableAlias"]]) { + coefNames <- if (options[["codeFactors"]]) termNamesAliasedCoded else termNamesAliased + } else { + coefNames <- if (options[["codeFactors"]]) termNamesCoded else termNames + } coefFormula <- paste(ifelse(sign(coefs[-1,1])==1, " +", " -"), round(abs(coefs[-1,1]), .numDecimals), From 8921661d13f07cc9f802889f14692e9f7a401827 Mon Sep 17 00:00:00 2001 From: Johnny Date: Wed, 24 Sep 2025 14:43:38 +0200 Subject: [PATCH 3/3] also for factorial analysis --- R/doeAnalysis.R | 22 ++++++++++++---------- inst/qml/doeAnalysis.qml | 17 ++++++++++++++--- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/R/doeAnalysis.R b/R/doeAnalysis.R index e1191f4b..2b248c8d 100644 --- a/R/doeAnalysis.R +++ b/R/doeAnalysis.R @@ -27,6 +27,7 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { covariates <- options[["covariates"]] blocks <- options[["blocksFactorial"]] dependent <- options[["dependentFactorial"]] + stepwiseMethod <- options[["stepwiseMethodFactorial"]] } else if (options[["designType"]] == "responseSurfaceDesign") { ready <- length(options[["continuousFactorsResponseSurface"]]) >= 1 && length(options[["dependentResponseSurface"]]) >= 1 discretePredictors <- options[["fixedFactorsResponseSurface"]] @@ -34,6 +35,7 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { covariates <- NULL blocks <- options[["blocksResponseSurface"]] dependent <- options[["dependentResponseSurface"]] + stepwiseMethod <- options[["stepwiseMethodResponseSurface"]] } if (!ready) { @@ -55,7 +57,7 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { jaspResults[[dep]] <- createJaspContainer(title = dep) } - p <- try(.doeAnalysisMakeState(jaspResults, dataset, options, continuousPredictors, discretePredictors, blocks, covariates, dependent, ready)) + p <- try(.doeAnalysisMakeState(jaspResults, dataset, options, continuousPredictors, discretePredictors, blocks, covariates, dependent, stepwiseMethod, ready)) if (isTryError(p)) { jaspResults[["errorPlot"]] <- createJaspPlot(title = gettext("Error")) @@ -70,7 +72,7 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { .doeAnalysisAnovaTable(jaspResults, dependent, options, ready, coded) .doeAnalysisCoefficientsTable(jaspResults, dependent, options, ready, coded) .doeAnalysisEquationTable(jaspResults, dependent, options, ready, coded) - .doeAnalysisStepwiseTable(jaspResults, dependent, options, ready, coded) + .doeAnalysisStepwiseTable(jaspResults, dependent, options, ready, coded, stepwiseMethod) .doeAnalysisPlotPareto(jaspResults, dependent, options, blocks, covariates, ready) .doeAnalysisPlotEffectNormalDistribution(jaspResults, dependent, options, blocks, covariates, ready) .doeAnalysisPlotQQResiduals(jaspResults, dependent, options, ready) @@ -121,13 +123,13 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { "highestOrder", "order", "continuousFactorsFactorial", "modelTerms", "blocksFactorial", "designType", "continuousFactorsResponseSurface", "codeFactors", "rsmPredefinedModel", "fixedFactorsFactorial", "rsmPredefinedTerms", "dependentFactorial", "covariates", "codeFactorsMethod", "codeFactorsManualTable", - "squaredTerms", "sumOfSquaresType", "squaredTermsCoded", "stepwiseMethod") + "squaredTerms", "sumOfSquaresType", "squaredTermsCoded", "stepwiseMethodResponseSurface", "stepwiseMethodFactorial") return(deps) } .scaleDOEvariable <- function(x){2*(x-min(x))/(max(x)-min(x))-1} -.doeAnalysisMakeState <- function(jaspResults, dataset, options, continuousPredictors, discretePredictors, blocks, covariates, dependent, ready) { +.doeAnalysisMakeState <- function(jaspResults, dataset, options, continuousPredictors, discretePredictors, blocks, covariates, dependent, stepwiseMethod, ready) { if (!ready || jaspResults$getError()) { return() } @@ -256,19 +258,19 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { formula <- as.formula(formulaString) - if (options[["stepwiseMethod"]] != "enter") { + if (stepwiseMethod != "enter") { stepwiseData <- if (options[["codeFactors"]]) datasetCoded else dataset fullModel <- lm(formula, data = stepwiseData) - if (options[["stepwiseMethod"]] == "forward") { + if (stepwiseMethod == "forward") { initialFormula <- as.formula(paste(dep, "~ 1")) } else { initialFormula <- formula } stepwiseModel <- step(lm(initialFormula, data = stepwiseData), - direction = options[["stepwiseMethod"]], + direction = stepwiseMethod, trace = 0, scope = formula, keep = function(model, aic) @@ -542,7 +544,7 @@ doeAnalysis <- function(jaspResults, dataset, options, ...) { coefNames <- if (options[["codeFactors"]]) termNamesCoded else termNames } - coefFormula <- paste(ifelse(sign(coefs[-1,1])==1, " +", " -"), + coefFormula <- paste(ifelse(sign(coefs[-1,1])==1, " +", " \u2013"), round(abs(coefs[-1,1]), .numDecimals), coefNames[-1], collapse = "", sep = " ") @@ -1458,9 +1460,9 @@ get_levels <- function(var, num_levels, dataset) { } } -.doeAnalysisStepwiseTable <- function(jaspResults, dependent, options, ready, coded) { +.doeAnalysisStepwiseTable <- function(jaspResults, dependent, options, ready, coded, stepwiseMethod) { for (dep in dependent) { - if (options[["stepwiseMethod"]] == "enter" || !is.null(jaspResults[[dep]][["result"]])) { + if (stepwiseMethod == "enter" || !is.null(jaspResults[[dep]][["doeResult"]])) { return() } diff --git a/inst/qml/doeAnalysis.qml b/inst/qml/doeAnalysis.qml index 73b99263..bfb65bb4 100644 --- a/inst/qml/doeAnalysis.qml +++ b/inst/qml/doeAnalysis.qml @@ -35,6 +35,18 @@ Form label: qsTr("Responses") height: 50 * preferencesModel.uiScale } + + DropDown + { + name: "stepwiseMethodFactorial" + label: qsTr("Method") + info: qsTr("Specify the order in which the predictors are entered into the model. A block of one or more predictors represents one step in the hierarchy. Note that the present release does not allow for more than one block.") + values: [ + { label: qsTr("Enter"), info: qsTr("All predictors are entered into the model simultaneously.") ,value: "enter"}, + { label: qsTr("Backward"), info: qsTr("Starting with the full model, predictors are removed sequentially based on AIC."), value: "backward"}, + { label: qsTr("Forward"), info: qsTr("Starting with the intercept-only model, predictors are entered sequentially based on AIC.") , value: "forward"} + ] + } AssignedVariablesList { @@ -92,14 +104,13 @@ Form DropDown { - name: "stepwiseMethod" + name: "stepwiseMethodResponseSurface" label: qsTr("Method") info: qsTr("Specify the order in which the predictors are entered into the model. A block of one or more predictors represents one step in the hierarchy. Note that the present release does not allow for more than one block.") values: [ { label: qsTr("Enter"), info: qsTr("All predictors are entered into the model simultaneously.") ,value: "enter"}, { label: qsTr("Backward"), info: qsTr("Starting with the full model, predictors are removed sequentially based on AIC."), value: "backward"}, - { label: qsTr("Forward"), info: qsTr("Starting with the intercept-only model, predictors are entered sequentially based on AIC.") , value: "forward"} - + { label: qsTr("Forward"), info: qsTr("Starting with the intercept-only model, predictors are entered sequentially based on AIC.") , value: "forward"} ] }