Skip to content

Commit 5759736

Browse files
Adding quantile types (#437)
Co-authored-by: Don van den Bergh <donvdbergh@hotmail.com>
1 parent c7af6c8 commit 5759736

File tree

3 files changed

+115
-21
lines changed

3 files changed

+115
-21
lines changed

R/descriptives.R

Lines changed: 91 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
128128
if (options$boxPlot) {
129129
if (is.null(jaspResults[["boxPlot"]])) {
130130
jaspResults[["boxPlot"]] <- createJaspContainer(gettext("Boxplots"))
131-
jaspResults[["boxPlot"]]$dependOn(c("boxPlot", "splitBy"))
131+
jaspResults[["boxPlot"]]$dependOn(c("boxPlot", "splitBy", "quantilesType"))
132132
jaspResults[["boxPlot"]]$position <- 7
133133
}
134134

@@ -151,7 +151,7 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
151151
else # only one Q-Q Plot
152152
gettext("Q-Q Plot")
153153
)
154-
jaspResults[["QQPlots"]]$dependOn(c("qqPlot", "splitBy"))
154+
jaspResults[["QQPlots"]]$dependOn(c("qqPlot", "splitBy", "quantilesType"))
155155
jaspResults[["QQPlots"]]$position <- 8
156156
}
157157
QQPlots <- jaspResults[["QQPlots"]]
@@ -393,7 +393,8 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
393393
"seMean", "sd", "coefficientOfVariation", "variance", "skewness", "kurtosis", "shapiroWilkTest",
394394
"range", "iqr", "mad", "madRobust", "minimum", "maximum", "sum", "quartiles", "quantilesForEqualGroups",
395395
"percentiles", "descriptivesTableTransposed", "valid", "missing", "meanCi", "meanCiLevel", "meanCiMethod",
396-
"sdCi", "sdCiLevel", "sdCiMethod", "varianceCiMethod", "varianceCi", "varianceCiLevel", "ciBootstrapSamples"
396+
"sdCi", "sdCiLevel", "sdCiMethod", "varianceCiMethod", "varianceCi", "varianceCiLevel", "ciBootstrapSamples",
397+
"quantilesType"
397398
))
398399

399400
if (wantsSplit) {
@@ -611,7 +612,7 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
611612
resultsCol[["Coefficient of Variation"]]<- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$coefficientOfVariation, na.omitted, function(param) { sd(param) / mean(param)})
612613
resultsCol[["MAD"]] <- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$mad, na.omitted, function(param) { mad(param, constant = 1) } )
613614
resultsCol[["MAD Robust"]] <- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$madRobust, na.omitted, mad)
614-
resultsCol[["IQR"]] <- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$iqr, na.omitted, .descriptivesIqr)
615+
resultsCol[["IQR"]] <- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$iqr, na.omitted, .descriptivesIqr, options)
615616
resultsCol[["Variance"]] <- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$variance, na.omitted, var)
616617
resultsCol[["Kurtosis"]] <- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$kurtosis, na.omitted, .descriptivesKurtosis)
617618
resultsCol[["Std. Error of Kurtosis"]] <- .descriptivesDescriptivesTable_subFunction_OptionChecker(options$kurtosis, na.omitted, .descriptivesSEK)
@@ -705,8 +706,19 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
705706
if (columnType == "scale" || columnType == "ordinal") {
706707
# Type 7: default in R
707708
# Type 3: Nearest even order statistic (SAS default till ca. 2010).
708-
quartileType <- ifelse(columnType == "scale", 7, 3)
709-
q123 <- quantile(na.omitted, c(.25, .5, .75), names = FALSE, type = quartileType)
709+
quartileType <- as.integer(options[["quantilesType"]]) # extract number from type for quantile() function
710+
711+
# Treat variable (i.e., na.omitted) as numeric to make all quantile types applicable.
712+
# If we would treat ordinals as ordered factors, the quantile function would lead
713+
# to an error.
714+
# This is necessary because the user can specify the type from the JASP GUI now,
715+
# and the type is applied across all input variables.
716+
q123 <- quantile(as.numeric(na.omitted), c(.25, .5, .75), names = FALSE, type = quartileType)
717+
718+
# To get the labels of the ordinal as output in the table and
719+
# not the numbers, index the levels of the factor
720+
if(columnType == "ordinal")
721+
q123 <- levels(na.omitted)[q123]
710722

711723
resultsCol[["q1"]] <- toMixedCol(q123[1])
712724
resultsCol[["q2"]] <- toMixedCol(q123[2])
@@ -745,19 +757,38 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
745757
if (columnType == "scale" || columnType == "ordinal") {
746758
# Type 7: default in R
747759
# Type 3: Nearest even order statistic (SAS default till ca. 2010).
748-
quartileType <- ifelse(columnType == "scale", 7, 3)
760+
quartileType <- as.integer(options[["quantilesType"]]) # extract number from type for quantile() function
761+
749762
if (options$quantilesForEqualGroups) {
750763

751-
for (i in seq(equalGroupsNo - 1))
752-
resultsCol[[paste0("eg", i)]] <- toMixedCol(quantile(na.omitted, c(i / equalGroupsNo), names = FALSE, type = quartileType))
764+
for (i in seq(equalGroupsNo - 1)) {
765+
# Treat variable (i.e., na.omitted) as numeric to make all quantile types applicable.
766+
# If we would treat ordinals as ordered factors, the quantile function would lead
767+
# to an error.
768+
# This is necessary because the user can specify the type from the JASP GUI now,
769+
# and the type is applied across all input variables.
770+
quantileEst <- quantile(as.numeric(na.omitted), c(i / equalGroupsNo), names = FALSE, type = quartileType)
753771

772+
# To get the labels of the ordinal as output in the table and
773+
# not the numbers, index the levels of the factor
774+
if (columnType == "ordinal")
775+
quantileEst <- levels(na.omitted)[quantileEst]
776+
777+
resultsCol[[paste0("eg", i)]] <- toMixedCol(quantileEst)
778+
}
754779
}
755780

756781
if (options$percentiles) {
757782

758-
for (i in percentilesPercentiles)
759-
resultsCol[[paste0("pc", i)]] <- toMixedCol(quantile(na.omitted, c(i / 100), names = FALSE, type = quartileType))
783+
for (i in percentilesPercentiles) {
784+
# See explanation above
785+
percentileEst <- quantile(as.numeric(na.omitted), c(i / 100), names = FALSE, type = quartileType)
786+
787+
if (columnType == "ordinal")
788+
percentileEst <- levels(na.omitted)[percentileEst]
760789

790+
resultsCol[[paste0("pc", i)]] <- toMixedCol(percentileEst)
791+
}
761792
}
762793
} else {
763794
if (options$quantilesForEqualGroups) {
@@ -786,11 +817,11 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
786817
}
787818

788819

789-
.descriptivesDescriptivesTable_subFunction_OptionChecker <- function(optionToCheck, na.omitted, function_to_use) {
820+
.descriptivesDescriptivesTable_subFunction_OptionChecker <- function(optionToCheck, na.omitted, function_to_use, ...) {
790821
if (!optionToCheck)
791822
return(NULL)
792823

793-
return(function_to_use(na.omitted))
824+
return(function_to_use(na.omitted, ...))
794825
}
795826

796827
.descriptivesFrequencyTables <- function(dataset, options, freqTabs) {
@@ -1227,7 +1258,7 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
12271258

12281259
for (level in levels(plotDat$group)) {
12291260
v <- plotDat[plotDat$group == level, ]$y
1230-
quantiles <- quantile(v, probs = c(0.25, 0.75))
1261+
quantiles <- quantile(v, probs = c(0.25, 0.75), type = as.integer(options[["quantilesType"]]))
12311262
obsIQR <- quantiles[2] - quantiles[1]
12321263
plotDat[plotDat$group == level, ]$outlier <- v < (quantiles[1] - 1.5 * obsIQR) | v > (quantiles[2] + 1.5 * obsIQR)
12331264
}
@@ -1253,9 +1284,48 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
12531284
if (options[["boxPlotBoxPlot"]]) {
12541285
# if we add jittered data points, don't show outlier dots
12551286
outlierShape <- if (options[["boxPlotJitter"]]) NA else 19
1287+
1288+
# create summaries per group for boxplots
1289+
# needed to make the quantiles based on the type specified by the user
1290+
boxData <- dplyr::summarise(
1291+
dplyr::group_by(plotDat, group),
1292+
ymin = min(y[!outlier]), # lower end of whiskers smallest observation that is not an outlier
1293+
lower = quantile(y, 0.25, type = as.integer(options[["quantilesType"]])),
1294+
middle = quantile(y, 0.50, type = as.integer(options[["quantilesType"]])),
1295+
upper = quantile(y, 0.75, type = as.integer(options[["quantilesType"]])),
1296+
ymax = max(y[!outlier]) # upper end of whiskers largest observation that is not an outlier
1297+
)
1298+
1299+
# plot boxplots with errorbars based on the computed statistics
12561300
p <- p +
1257-
ggplot2::stat_boxplot(geom = "errorbar", size = 0.75, width = boxWidth / 2) +
1258-
ggplot2::geom_boxplot(size = 0.75, outlier.size = 2, width = boxWidth, outlier.shape = outlierShape)
1301+
ggplot2::geom_errorbar(data = boxData, ggplot2::aes(
1302+
ymin = ymin,
1303+
ymax = ymax,
1304+
x = group
1305+
),
1306+
inherit.aes = FALSE, linewidth = 0.75, width = boxWidth / 2) +
1307+
1308+
ggplot2::geom_boxplot(data = boxData, ggplot2::aes(
1309+
x = group,
1310+
fill = group,
1311+
ymin = ymin,
1312+
lower = lower,
1313+
middle = middle,
1314+
upper = upper,
1315+
ymax = ymax
1316+
),
1317+
inherit.aes = FALSE,
1318+
size = 0.75, width = boxWidth,
1319+
stat = "identity")
1320+
1321+
# add outliers if there are any
1322+
# since boxplot function cannot add them due to precomputed stats
1323+
if(any(plotDat$outlier)) {
1324+
p <- p +
1325+
ggplot2::geom_point(data = plotDat[plotDat$outlier, ],
1326+
shape = outlierShape,
1327+
size = 2)
1328+
}
12591329
}
12601330

12611331
if (options[["boxPlotJitter"]]) {
@@ -1539,9 +1609,10 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
15391609
return(kurtosis)
15401610
}
15411611

1542-
.descriptivesIqr <- function(x) {
1543-
# Interquartile range based on the stats package
1544-
return(stats::IQR(x))
1612+
.descriptivesIqr <- function(x, options) {
1613+
# Interquartile range based on the stats package using specified quantile type
1614+
type <- as.integer(options[["quantilesType"]])
1615+
return(stats::IQR(x, type = type))
15451616
}
15461617

15471618
.descriptivesSkewness <- function(x) {
@@ -1782,7 +1853,7 @@ DescriptivesInternal <- function(jaspResults, dataset, options) {
17821853

17831854
# adapted from qqline
17841855
x <- stats::qnorm(c(0.25, 0.75))
1785-
y <- stats::quantile(varCol, probs = c(0.25, 0.75))
1856+
y <- stats::quantile(varCol, probs = c(0.25, 0.75), type = as.integer(options[["quantilesType"]]))
17861857
slope <- diff(y) / diff(x)
17871858
int <- y[1L] - slope * x[1L]
17881859

inst/qml/Descriptives.qml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ Form
2626
{
2727
info: qsTr("Descriptives allows the user to obtain basic descriptive statistics, histograms and density plots, correlation plots, boxplots, and frequency tables.")
2828
infoBottom: "## " + qsTr("References") + "\n"
29+
+ "- Hyndman, R. J., & Fan, Y. (1996). Sample quantiles in statistical packages. *The American Statistician, 50*(4), 361–365." + "\n"
30+
+ "- Langford, E. (2006). Quartiles in elementary statistics. *Journal of Statistics Education, 14*(3)." + "\n"
2931
+ "- Moore, D. S., McCabe, G. P., & Craig, B. A. (2012). *Introduction to the practice of statistics (7th ed.)*. New York, NY: W. H. Freeman and Company." + "\n"
3032
+ "- Schwarz, G. (1978). Estimating the dimension of a model. *Annals of Statistics, 6*, 461-464." + "\n"
3133
+ "- Whitlock, M. C., & Schluter, D. (2015). *The analysis of biological data (2nd ed.)*. Greenwood Village, Colorado: Roberts and Company Publishers." + "\n"
@@ -46,7 +48,7 @@ Form
4648
{
4749
infoLabel: qsTr("Input")
4850
AvailableVariablesList { name: "allVariablesList" }
49-
AssignedVariablesList { name: "variables"; title: qsTr("Variables"); info: qsTr("All variables of interest."); allowTypeChange: true }
51+
AssignedVariablesList { name: "variables"; id: variables; title: qsTr("Variables"); info: qsTr("All variables of interest."); allowTypeChange: true}
5052
AssignedVariablesList { name: "splitBy"; title: qsTr("Split"); info: qsTr("Can be split by a categorical variable such as experimental condition.") ; singleVariable: true; allowedColumns: ["nominal"]; id: splitBy; minLevels: 2; maxLevels: 256 } // without maxLevels entering a continuous variable can freeze/ crash jasp, so we need an arbitrary maximum
5153
}
5254

@@ -75,6 +77,26 @@ Form
7577
title: qsTr("Quantiles")
7678
info: qsTr("Percentile Values")
7779

80+
DropDown
81+
{
82+
name: "quantilesType"
83+
label: qsTr("Type")
84+
id: quantilesType
85+
indexDefaultValue: 6 // Type 7, the default in R
86+
info: qsTr("Method used to compute quantiles (see Hyndman & Fan, 1996; Langford, 2006, for more information). The selection carries over to the inter-quartile range (IQR), box- and QQ-plot calculations. Note that ordinal variables are treated as continuous for the computations.\n")
87+
values: [
88+
{label: qsTr("1"), value: 1},
89+
{label: qsTr("2 (SAS)"), value: 2},
90+
{label: qsTr("3"), value: 3},
91+
{label: qsTr("4"), value: 4},
92+
{label: qsTr("5"), value: 5},
93+
{label: qsTr("6 (Minitab, SPSS)"), value: 6},
94+
{label: qsTr("7 (R)"), value: 7},
95+
{label: qsTr("8"), value: 8},
96+
{label: qsTr("9"), value: 9}
97+
]
98+
}
99+
78100
CheckBox { name: "quartiles"; label: qsTr("Quartiles"); info: qsTr("Displays the 25th, 50th, and 75th percentiles of the data points.") }
79101
CheckBox
80102
{

tests/testthat/test-descriptives.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,7 @@ test_that("minimum and maximum return correctly show labels (not values) for ord
558558
options$quantilesForEqualGroupsNumber <- 5
559559
options$percentiles <- TRUE
560560
options$percentileValues <- c(2, 5, 8)
561+
options$quantilesType <- 3 # unit test were written when type = 3 was the default for ordinals
561562

562563

563564
results <- jaspTools::runAnalysis("Descriptives", df, options)

0 commit comments

Comments
 (0)