Skip to content

Commit 63369bf

Browse files
author
maechler
committed
quantile() tweak: using *relative* fuzz before rounding, also for type = 1,2,3; Rd: more references
git-svn-id: https://svn.r-project.org/R/trunk@87746 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 5a42b48 commit 63369bf

File tree

4 files changed

+56
-25
lines changed

4 files changed

+56
-25
lines changed

doc/NEWS.Rd

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,10 @@
593593
when it tars the <pkg> directory, fixing both \PR{18432} and
594594
\PR{18434}, for \command{vim} and \command{GNU Global}
595595
\command{emacs} users, thanks to \I{Dirk Eddelbuettel}'s patch.
596+
597+
\item \code{quantile()}'s default method gets an option \code{fuzz}
598+
to protect against floating point inaccuracy before rounding, thus
599+
fixing \PR{15811} and, en passant, \PR{17683}.
596600
}
597601
}
598602
}

src/library/stats/R/quantile.R

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# File src/library/stats/R/quantile.R
22
# Part of the R package, https://www.R-project.org
33
#
4-
# Copyright (C) 1995-2022 The R Core Team
4+
# Copyright (C) 1995-2025 The R Core Team
55
#
66
# This program is free software; you can redistribute it and/or modify
77
# it under the terms of the GNU General Public License as published by
@@ -23,8 +23,11 @@ quantile.POSIXt <- function(x, ...)
2323

2424
quantile.default <-
2525
function(x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE,
26-
type = 7, digits = 7, ...)
26+
type = 7L, digits = 7,
27+
fuzz = if(type == 7L) 0 else 4 * .Machine$double.eps, ...)
2728
{
29+
if(length(type) != 1L || is.na(type) || !is.numeric(type) || !any(type == 1:9))
30+
stop("'type' must be an integer in 1..9")
2831
if(is.factor(x)) {
2932
if(is.ordered(x)) {
3033
if(!any(type == c(1L, 3L)))
@@ -41,16 +44,19 @@ quantile.default <-
4144
x <- x[!is.na(x)]
4245
else if (anyNA(x))
4346
stop("missing values and NaN's not allowed if 'na.rm' is FALSE")
44-
eps <- 100*.Machine$double.eps
47+
eps <- 100*.Machine$double.eps # allow for slight overshoot
4548
if (any((p.ok <- !is.na(probs)) & (probs < -eps | probs > 1+eps)))
4649
stop("'probs' outside [0,1]")
50+
probs <- pmax(0, pmin(1, probs))
51+
stopifnot(is.finite(fuzz), fuzz >= 0, length(fuzz) == 1L)
52+
## need to watch for rounding errors --> use *relative* fuzz
53+
floorF <- function(np) floor(np * (1+fuzz))
4754
n <- length(x)
48-
probs <- pmax(0, pmin(1, probs)) # allow for slight overshoot
4955
np <- length(probs)
5056
{
51-
if(type == 7) { # be completely back-compatible
57+
if(type == 7) { # be completely back-compatible (=> default fuzz := 0)
5258
index <- 1 + max(n - 1, 0) * probs
53-
lo <- floor(index)
59+
lo <- floorF(index)
5460
hi <- ceiling(index)
5561
x <- sort(x, partial = if(n == 0) numeric() else unique(c(lo, hi)[p.ok]))
5662
qs <- x[lo]
@@ -60,28 +66,26 @@ quantile.default <-
6066
## qs[i] <- ifelse(h == 0, qs[i], (1 - h) * qs[i] + h * x[hi[i]])
6167
qs[i] <- (1 - h) * qs[i] + h * x[hi[i]]
6268
} else {
63-
if (type <= 3) {
69+
if (type <= 3L) {
6470
## Types 1, 2 and 3 are discontinuous sample qs.
65-
nppm <- if (type == 3) n * probs - .5 # n * probs + m; m = -0.5
66-
else n * probs # m = 0
67-
j <- floor(nppm)
71+
nppm <- if (type == 3L) n * probs - .5 # n * probs + m; m = -0.5
72+
else n * probs # m = 0
73+
j <- floorF(nppm)
6874
h <- switch(type,
6975
!p.ok | (nppm > j), # type 1
7076
((nppm > j) + 1)/2, # type 2
7177
!p.ok | (nppm != j) | ((j %% 2L) == 1L)) # type 3
7278
} else {
7379
## Types 4 through 9 are continuous sample qs.
74-
switch(type - 3,
80+
switch(type - 3L,
7581
{a <- 0; b <- 1}, # type 4
7682
a <- b <- 0.5, # type 5
7783
a <- b <- 0, # type 6
7884
a <- b <- 1, # type 7 (unused here)
7985
a <- b <- 1 / 3, # type 8
8086
a <- b <- 3 / 8) # type 9
81-
## need to watch for rounding errors here
82-
fuzz <- 4 * .Machine$double.eps
83-
nppm <- a + probs * (n + 1 - a - b) # n*probs + m
84-
j <- floor(nppm + fuzz) # m = a + probs*(1 - a - b)
87+
nppm <- a + probs * (n + 1 - a - b) # n*probs + m; m:= a + probs*(1 - a - b)
88+
j <- floorF(nppm)
8589
h <- nppm - j
8690
if(any(sml <- abs(h) < fuzz, na.rm = TRUE)) h[sml] <- 0
8791
}
@@ -112,7 +116,7 @@ quantile.default <-
112116

113117
##' Formatting() percentages the same way as quantile(*, names=TRUE).
114118
##' Should be exported
115-
##' (and format.pval() moved to stats; both documented on same page)
119+
##' NB format.pval() in 'base', needed by print.summary.table() ...
116120
format_perc <- function(x, digits = max(2L, getOption("digits")),
117121
probability = TRUE, use.fC = length(x) < 100, ...)
118122
{

src/library/stats/man/quantile.Rd

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
% File src/library/stats/man/quantile.Rd
22
% Part of the R package, https://www.R-project.org
3-
% Copyright 1995-2020 R Core Team
3+
% Copyright 1995-2025 R Core Team
44
% Distributed under GPL 2 or later
55

66
\name{quantile}
@@ -17,7 +17,9 @@
1717
quantile(x, \dots)
1818

1919
\method{quantile}{default}(x, probs = seq(0, 1, 0.25), na.rm = FALSE,
20-
names = TRUE, type = 7, digits = 7, \dots)
20+
names = TRUE, type = 7, digits = 7,
21+
fuzz = if(type == 7L) 0 else 4 * .Machine$double.eps,
22+
\dots)
2123
}
2224
\arguments{
2325
\item{x}{numeric vector whose sample quantiles are wanted, or an
@@ -36,6 +38,9 @@ quantile(x, \dots)
3638
\item{digits}{used only when \code{names} is true: the precision to use
3739
when formatting the percentages. In \R versions up to 4.0.x, this had
3840
been set to \code{max(2, getOption("digits"))}, internally.}
41+
\item{fuzz}{small non-negative number to protect against rounding errors
42+
when \code{j <- \link{floor}(np + m)}, (\eqn{np} \dQuote{a version of}
43+
\code{n * probs}, see the formula below), is computed.}
3944
\item{\dots}{further arguments passed to or from other methods.}
4045
}
4146
\details{
@@ -57,6 +62,11 @@ quantile(x, \dots)
5762
There is a method for the date-time classes (see
5863
\code{"\link{POSIXt}"}). Types 1 and 3 can be used for class
5964
\code{"\link{Date}"} and for ordered factors.
65+
66+
\code{fuzz := 4 * .Machine$double.eps} has been hard coded and used for
67+
\code{type = 4,5,6, 8,9} since \code{type}s were introduced, % r30628, 2004-08-12
68+
and is used, since \R 4.5.0, also for the other types (but with default
69+
\code{0} for the default \code{type = 7} for back compatibility reasons).
6070
}
6171
\section{Types}{
6272
\code{quantile} returns estimates of underlying distribution quantiles
@@ -139,14 +149,16 @@ quantile(x, \dots)
139149
}
140150
}
141151
Further details are provided in \bibcite{Hyndman and Fan (1996)} who
142-
recommended type 8.
152+
recommended type 8.
143153
The default method is type 7, as used by S and by \R < 2.0.0.
144-
\I{Makkonen} argues for type 6, also as already proposed by Weibull in 1939.
154+
\bibcite{Makkonen and Pajari (2014)} argue for type 6, also as already
155+
proposed by Weibull in 1939.
145156
The Wikipedia page contains further information about availability of
146157
these 9 types in software.
147158
}
148159
\author{
149-
of the version used in \R >= 2.0.0, Ivan Frohne and Rob J Hyndman.
160+
of the version used in \R >= 2.0.0, Ivan Frohne and Rob J Hyndman;
161+
tweaks, notably use of \code{fuzz}, by the R Core Team.
150162
}
151163
\references{
152164
Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
@@ -157,13 +169,16 @@ quantile(x, \dots)
157169
packages, \emph{American Statistician} \bold{50}, 361--365.
158170
\doi{10.2307/2684934}.
159171
172+
Eric Langford (2006) Quartiles in Elementary Statistics, \emph{Journal
173+
of Statistics Education} \bold{14} 3; \doi{10.1080/10691898.2006.11910589}
174+
175+
Makkonen, L. and Pajari, M. (2014) Defining Sample Quantiles by the True
176+
Rank Probability, \emph{Journal of Probability and Statistics; Hindawi Publ.Corp.}
177+
\doi{10.1155/2014/326579}
178+
160179
Wicklin, R. (2017) Sample quantiles: A comparison of 9 definitions; SAS Blog.
161180
\url{https://blogs.sas.com/content/iml/2017/05/24/definitions-sample-quantiles.html}
162181
163-
%% Makkonen, L. and Pajari, M. (2014) Defining Sample Quantiles by the True
164-
%% Rank Probability. \emph{Journal of Probability and Statistics; Hindawi Publ.Corp.}
165-
%% \doi{10.1155/2014/326579}
166-
%%
167182
Wikipedia: \url{https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample}
168183
}
169184
\seealso{

tests/reg-tests-1e.R

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1846,6 +1846,14 @@ tools::parseLatex("\\Sexpr{ 1 + {1} }")
18461846
assertErrV(tools::parseLatex("\\begin{foo} abc \\end{bar}"))
18471847

18481848

1849+
## quantile.default() needing fuzz PR#15811
1850+
x <- 1:1390
1851+
stopifnot(identical(print(quantile(x, 0.7, type=2)),
1852+
c(`70%` = 973.5)))
1853+
## was 973 in R <= 4.4.x
1854+
1855+
1856+
18491857
## keep at end
18501858
rbind(last = proc.time() - .pt,
18511859
total = proc.time())

0 commit comments

Comments
 (0)