Skip to content

Commit 542f57e

Browse files
authored
added quantile estimator with parameters alpha and beta (#20)
1 parent 81cc540 commit 542f57e

File tree

2 files changed

+165
-49
lines changed

2 files changed

+165
-49
lines changed

src/Statistics.jl

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -833,27 +833,38 @@ _median(v::AbstractArray, dims) = mapslices(median!, v, dims = dims)
833833

834834
_median(v::AbstractArray{T}, ::Colon) where {T} = median!(copyto!(Array{T,1}(undef, length(v)), v))
835835

836-
# for now, use the R/S definition of quantile; may want variants later
837-
# see ?quantile in R -- this is type 7
838836
"""
839-
quantile!([q::AbstractArray, ] v::AbstractVector, p; sorted=false)
837+
quantile!([q::AbstractArray, ] v::AbstractVector, p; sorted=false, alpha::Real=1.0, beta::Real=alpha)
840838
841839
Compute the quantile(s) of a vector `v` at a specified probability or vector or tuple of
842840
probabilities `p` on the interval [0,1]. If `p` is a vector, an optional
843841
output array `q` may also be specified. (If not provided, a new output array is created.)
844842
The keyword argument `sorted` indicates whether `v` can be assumed to be sorted; if
845843
`false` (the default), then the elements of `v` will be partially sorted in-place.
846844
847-
Quantiles are computed via linear interpolation between the points `((k-1)/(n-1), v[k])`,
848-
for `k = 1:n` where `n = length(v)`. This corresponds to Definition 7 of Hyndman and Fan
849-
(1996), and is the same as the R default.
845+
By default (`alpha = beta = 1`), quantiles are computed via linear interpolation between the points
846+
`((k-1)/(n-1), v[k])`, for `k = 1:n` where `n = length(v)`. This corresponds to Definition 7
847+
of Hyndman and Fan (1996), and is the same as the R and NumPy default.
848+
849+
The keyword arguments `alpha` and `beta` correspond to the same parameters in Hyndman and Fan,
850+
setting them to different values allows to calculate quantiles with any of the methods 4-9
851+
defined in this paper:
852+
- Def. 4: `alpha=0`, `beta=1`
853+
- Def. 5: `alpha=0.5`, `beta=0.5`
854+
- Def. 6: `alpha=0`, `beta=0` (Excel `PERCENTILE.EXC`, Python default, Stata `altdef`)
855+
- Def. 7: `alpha=1`, `beta=1` (Julia, R and NumPy default, Excel `PERCENTILE` and `PERCENTILE.INC`, Python `'inclusive'`)
856+
- Def. 8: `alpha=1/3`, `beta=1/3`
857+
- Def. 9: `alpha=3/8`, `beta=3/8`
850858
851859
!!! note
852860
An `ArgumentError` is thrown if `v` contains `NaN` or [`missing`](@ref) values.
853861
854-
* Hyndman, R.J and Fan, Y. (1996) "Sample Quantiles in Statistical Packages",
862+
# References
863+
- Hyndman, R.J and Fan, Y. (1996) "Sample Quantiles in Statistical Packages",
855864
*The American Statistician*, Vol. 50, No. 4, pp. 361-365
856865
866+
- [Quantile on Wikipedia](https://en.m.wikipedia.org/wiki/Quantile) details the different quantile definitions
867+
857868
# Examples
858869
```jldoctest
859870
julia> using Statistics
@@ -876,13 +887,13 @@ true
876887
877888
julia> y
878889
3-element Array{Float64,1}:
879-
1.2
890+
1.2000000000000002
880891
2.0
881-
2.8
892+
2.8000000000000003
882893
```
883894
"""
884895
function quantile!(q::AbstractArray, v::AbstractVector, p::AbstractArray;
885-
sorted::Bool=false)
896+
sorted::Bool=false, alpha::Real=1.0, beta::Real=alpha)
886897
require_one_based_indexing(q, v, p)
887898
if size(p) != size(q)
888899
throw(DimensionMismatch("size of p, $(size(p)), must equal size of q, $(size(q))"))
@@ -893,22 +904,22 @@ function quantile!(q::AbstractArray, v::AbstractVector, p::AbstractArray;
893904
_quantilesort!(v, sorted, minp, maxp)
894905

895906
for (i, j) in zip(eachindex(p), eachindex(q))
896-
@inbounds q[j] = _quantile(v,p[i])
907+
@inbounds q[j] = _quantile(v,p[i], alpha=alpha, beta=beta)
897908
end
898909
return q
899910
end
900911

901912
function quantile!(v::AbstractVector, p::Union{AbstractArray, Tuple{Vararg{Real}}};
902-
sorted::Bool=false)
913+
sorted::Bool=false, alpha::Real=1., beta::Real=alpha)
903914
if !isempty(p)
904915
minp, maxp = extrema(p)
905916
_quantilesort!(v, sorted, minp, maxp)
906917
end
907-
return map(x->_quantile(v, x), p)
918+
return map(x->_quantile(v, x, alpha=alpha, beta=beta), p)
908919
end
909920

910-
quantile!(v::AbstractVector, p::Real; sorted::Bool=false) =
911-
_quantile(_quantilesort!(v, sorted, p, p), p)
921+
quantile!(v::AbstractVector, p::Real; sorted::Bool=false, alpha::Real=1., beta::Real=alpha) =
922+
_quantile(_quantilesort!(v, sorted, p, p), p, alpha=alpha, beta=beta)
912923

913924
# Function to perform partial sort of v for quantiles in given range
914925
function _quantilesort!(v::AbstractArray, sorted::Bool, minp::Real, maxp::Real)
@@ -917,8 +928,8 @@ function _quantilesort!(v::AbstractArray, sorted::Bool, minp::Real, maxp::Real)
917928

918929
if !sorted
919930
lv = length(v)
920-
lo = floor(Int,1+minp*(lv-1))
921-
hi = ceil(Int,1+maxp*(lv-1))
931+
lo = floor(Int,minp*(lv))
932+
hi = ceil(Int,1+maxp*(lv))
922933

923934
# only need to perform partial sort
924935
sort!(v, 1, lv, Base.Sort.PartialQuickSort(lo:hi), Base.Sort.Forward)
@@ -929,45 +940,65 @@ function _quantilesort!(v::AbstractArray, sorted::Bool, minp::Real, maxp::Real)
929940
end
930941

931942
# Core quantile lookup function: assumes `v` sorted
932-
@inline function _quantile(v::AbstractVector, p::Real)
943+
@inline function _quantile(v::AbstractVector, p::Real; alpha::Real=1.0, beta::Real=alpha)
933944
0 <= p <= 1 || throw(ArgumentError("input probability out of [0,1] range"))
945+
0 <= alpha <= 1 || throw(ArgumentError("alpha parameter out of [0,1] range"))
946+
0 <= beta <= 1 || throw(ArgumentError("beta parameter out of [0,1] range"))
934947
require_one_based_indexing(v)
935948

936-
lv = length(v)
937-
f0 = (lv - 1)*p # 0-based interpolated index
938-
t0 = trunc(f0)
939-
h = f0 - t0
940-
i = trunc(Int,t0) + 1
949+
n = length(v)
950+
m = alpha + p * (one(alpha) - alpha - beta)
951+
aleph = n*p + oftype(p, m)
952+
j = clamp(trunc(Int, aleph), 1, n-1)
953+
γ = clamp(aleph - j, 0, 1)
954+
955+
a = v[j]
956+
b = v[j + 1]
941957

942-
a = v[i]
943-
b = v[i + (h > 0)]
944958
if isfinite(a) && isfinite(b)
945-
return a + h*(b-a)
959+
return a + γ*(b-a)
946960
else
947-
return (1-h)*a + h*b
961+
return (1-γ)*a + γ*b
948962
end
949963
end
950964

951-
952965
"""
953-
quantile(itr, p; sorted=false)
966+
quantile(itr, p; sorted=false, alpha::Real=1.0, beta::Real=alpha)
954967
955968
Compute the quantile(s) of a collection `itr` at a specified probability or vector or tuple of
956969
probabilities `p` on the interval [0,1]. The keyword argument `sorted` indicates whether
957970
`itr` can be assumed to be sorted.
958971
959-
Quantiles are computed via linear interpolation between the points `((k-1)/(n-1), v[k])`,
960-
for `k = 1:n` where `n = length(itr)`. This corresponds to Definition 7 of Hyndman and Fan
961-
(1996), and is the same as the R default.
972+
Samples quantile are defined by `Q(p) = (1-γ)*x[j] + γ*x[j+1]`,
973+
where ``x[j]`` is the j-th order statistic, and `γ` is a function of
974+
`j = floor(n*p + m)`, `m = alpha + p*(1 - alpha - beta)` and
975+
`g = n*p + m - j`.
976+
977+
By default (`alpha = beta = 1`), quantiles are computed via linear interpolation between the points
978+
`((k-1)/(n-1), v[k])`, for `k = 1:n` where `n = length(itr)`. This corresponds to Definition 7
979+
of Hyndman and Fan (1996), and is the same as the R and NumPy default.
980+
981+
The keyword arguments `alpha` and `beta` correspond to the same parameters in Hyndman and Fan,
982+
setting them to different values allows to calculate quantiles with any of the methods 4-9
983+
defined in this paper:
984+
- Def. 4: `alpha=0`, `beta=1`
985+
- Def. 5: `alpha=0.5`, `beta=0.5`
986+
- Def. 6: `alpha=0`, `beta=0` (Excel `PERCENTILE.EXC`, Python default, Stata `altdef`)
987+
- Def. 7: `alpha=1`, `beta=1` (Julia, R and NumPy default, Excel `PERCENTILE` and `PERCENTILE.INC`, Python `'inclusive'`)
988+
- Def. 8: `alpha=1/3`, `beta=1/3`
989+
- Def. 9: `alpha=3/8`, `beta=3/8`
962990
963991
!!! note
964-
An `ArgumentError` is thrown if `itr` contains `NaN` or [`missing`](@ref) values.
992+
An `ArgumentError` is thrown if `v` contains `NaN` or [`missing`](@ref) values.
965993
Use the [`skipmissing`](@ref) function to omit `missing` entries and compute the
966994
quantiles of non-missing values.
967995
996+
# References
968997
- Hyndman, R.J and Fan, Y. (1996) "Sample Quantiles in Statistical Packages",
969998
*The American Statistician*, Vol. 50, No. 4, pp. 361-365
970999
1000+
- [Quantile on Wikipedia](https://en.m.wikipedia.org/wiki/Quantile) details the different quantile definitions
1001+
9711002
# Examples
9721003
```jldoctest
9731004
julia> using Statistics
@@ -979,16 +1010,17 @@ julia> quantile(0:20, [0.1, 0.5, 0.9])
9791010
3-element Array{Float64,1}:
9801011
2.0
9811012
10.0
982-
18.0
1013+
18.000000000000004
9831014
9841015
julia> quantile(skipmissing([1, 10, missing]), 0.5)
9851016
5.5
9861017
```
9871018
"""
988-
quantile(itr, p; sorted::Bool=false) = quantile!(collect(itr), p, sorted=sorted)
1019+
quantile(itr, p; sorted::Bool=false, alpha::Real=1.0, beta::Real=alpha) =
1020+
quantile!(collect(itr), p, sorted=sorted, alpha=alpha, beta=beta)
9891021

990-
quantile(v::AbstractVector, p; sorted::Bool=false) =
991-
quantile!(sorted ? v : Base.copymutable(v), p; sorted=sorted)
1022+
quantile(v::AbstractVector, p; sorted::Bool=false, alpha::Real=1.0, beta::Real=alpha) =
1023+
quantile!(sorted ? v : Base.copymutable(v), p; sorted=sorted, alpha=alpha, beta=beta)
9921024

9931025

9941026
##### SparseArrays optimizations #####

test/runtests.jl

Lines changed: 96 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -480,29 +480,30 @@ end
480480
end
481481

482482
@testset "quantile" begin
483-
@test quantile([1,2,3,4],0.5) == 2.5
484-
@test quantile([1,2,3,4],[0.5]) == [2.5]
485-
@test quantile([1., 3],[.25,.5,.75])[2] == median([1., 3])
486-
@test quantile(100.0:-1.0:0.0, 0.0:0.1:1.0) == 0.0:10.0:100.0
487-
@test quantile(0.0:100.0, 0.0:0.1:1.0, sorted=true) == 0.0:10.0:100.0
488-
@test quantile(100f0:-1f0:0.0, 0.0:0.1:1.0) == 0f0:10f0:100f0
483+
@test quantile([1,2,3,4],0.5) 2.5
484+
@test quantile([1,2,3,4],[0.5]) [2.5]
485+
@test quantile([1., 3],[.25,.5,.75])[2] median([1., 3])
486+
@test quantile(100.0:-1.0:0.0, 0.0:0.1:1.0) 0.0:10.0:100.0
487+
@test quantile(0.0:100.0, 0.0:0.1:1.0, sorted=true) 0.0:10.0:100.0
488+
@test quantile(100f0:-1f0:0.0, 0.0:0.1:1.0) 0f0:10f0:100f0
489489
@test quantile([Inf,Inf],0.5) == Inf
490490
@test quantile([-Inf,1],0.5) == -Inf
491-
@test quantile([0,1],1e-18) == 1e-18
491+
# here it is required to introduce an absolute tolerance because the calculated value is 0
492+
@test quantile([0,1],1e-18) 1e-18 atol=1e-18
492493
@test quantile([1, 2, 3, 4],[]) == []
493494
@test quantile([1, 2, 3, 4], (0.5,)) == (2.5,)
494495
@test quantile([4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11],
495496
(0.1, 0.2, 0.4, 0.9)) == (2.0, 3.0, 5.0, 11.0)
496497
@test quantile(Union{Int, Missing}[4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11],
497-
[0.1, 0.2, 0.4, 0.9]) == [2.0, 3.0, 5.0, 11.0]
498+
[0.1, 0.2, 0.4, 0.9]) [2.0, 3.0, 5.0, 11.0]
498499
@test quantile(Any[4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11],
499-
[0.1, 0.2, 0.4, 0.9]) == [2.0, 3.0, 5.0, 11.0]
500+
[0.1, 0.2, 0.4, 0.9]) [2.0, 3.0, 5.0, 11.0]
500501
@test quantile([4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11],
501-
Any[0.1, 0.2, 0.4, 0.9]) == [2.0, 3.0, 5.0, 11.0]
502+
Any[0.1, 0.2, 0.4, 0.9]) [2.0, 3.0, 5.0, 11.0]
502503
@test quantile([4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11],
503504
Any[0.1, 0.2, 0.4, 0.9]) isa Vector{Float64}
504505
@test quantile(Any[4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11],
505-
Any[0.1, 0.2, 0.4, 0.9]) == [2, 3, 5, 11]
506+
Any[0.1, 0.2, 0.4, 0.9]) [2, 3, 5, 11]
506507
@test quantile(Any[4, 9, 1, 5, 7, 8, 2, 3, 5, 17, 11],
507508
Any[0.1, 0.2, 0.4, 0.9]) isa Vector{Float64}
508509
@test quantile([1, 2, 3, 4], ()) == ()
@@ -533,7 +534,90 @@ end
533534
x = [3; 2; 1]
534535
y = zeros(3)
535536
@test quantile!(y, x, [0.1, 0.5, 0.9]) === y
536-
@test y == [1.2, 2.0, 2.8]
537+
@test y [1.2, 2.0, 2.8]
538+
539+
#tests for quantile calculation with configurable alpha and beta parameters
540+
v = [2, 3, 4, 6, 9, 2, 6, 2, 21, 17]
541+
542+
# tests against scipy.stats.mstats.mquantiles method
543+
@test quantile(v, 0.0, alpha=0.0, beta=0.0) 2.0
544+
@test quantile(v, 0.2, alpha=1.0, beta=1.0) 2.0
545+
@test quantile(v, 0.4, alpha=0.0, beta=0.0) 3.4
546+
@test quantile(v, 0.4, alpha=0.0, beta=0.2) 3.32
547+
@test quantile(v, 0.4, alpha=0.0, beta=0.4) 3.24
548+
@test quantile(v, 0.4, alpha=0.0, beta=0.6) 3.16
549+
@test quantile(v, 0.4, alpha=0.0, beta=0.8) 3.08
550+
@test quantile(v, 0.4, alpha=0.0, beta=1.0) 3.0
551+
@test quantile(v, 0.4, alpha=0.2, beta=0.0) 3.52
552+
@test quantile(v, 0.4, alpha=0.2, beta=0.2) 3.44
553+
@test quantile(v, 0.4, alpha=0.2, beta=0.4) 3.36
554+
@test quantile(v, 0.4, alpha=0.2, beta=0.6) 3.28
555+
@test quantile(v, 0.4, alpha=0.2, beta=0.8) 3.2
556+
@test quantile(v, 0.4, alpha=0.2, beta=1.0) 3.12
557+
@test quantile(v, 0.4, alpha=0.4, beta=0.0) 3.64
558+
@test quantile(v, 0.4, alpha=0.4, beta=0.2) 3.56
559+
@test quantile(v, 0.4, alpha=0.4, beta=0.4) 3.48
560+
@test quantile(v, 0.4, alpha=0.4, beta=0.6) 3.4
561+
@test quantile(v, 0.4, alpha=0.4, beta=0.8) 3.32
562+
@test quantile(v, 0.4, alpha=0.4, beta=1.0) 3.24
563+
@test quantile(v, 0.4, alpha=0.6, beta=0.0) 3.76
564+
@test quantile(v, 0.4, alpha=0.6, beta=0.2) 3.68
565+
@test quantile(v, 0.4, alpha=0.6, beta=0.4) 3.6
566+
@test quantile(v, 0.4, alpha=0.6, beta=0.6) 3.52
567+
@test quantile(v, 0.4, alpha=0.6, beta=0.8) 3.44
568+
@test quantile(v, 0.4, alpha=0.6, beta=1.0) 3.36
569+
@test quantile(v, 0.4, alpha=0.8, beta=0.0) 3.88
570+
@test quantile(v, 0.4, alpha=0.8, beta=0.2) 3.8
571+
@test quantile(v, 0.4, alpha=0.8, beta=0.4) 3.72
572+
@test quantile(v, 0.4, alpha=0.8, beta=0.6) 3.64
573+
@test quantile(v, 0.4, alpha=0.8, beta=0.8) 3.56
574+
@test quantile(v, 0.4, alpha=0.8, beta=1.0) 3.48
575+
@test quantile(v, 0.4, alpha=1.0, beta=0.0) 4.0
576+
@test quantile(v, 0.4, alpha=1.0, beta=0.2) 3.92
577+
@test quantile(v, 0.4, alpha=1.0, beta=0.4) 3.84
578+
@test quantile(v, 0.4, alpha=1.0, beta=0.6) 3.76
579+
@test quantile(v, 0.4, alpha=1.0, beta=0.8) 3.68
580+
@test quantile(v, 0.4, alpha=1.0, beta=1.0) 3.6
581+
@test quantile(v, 0.6, alpha=0.0, beta=0.0) 6.0
582+
@test quantile(v, 0.6, alpha=1.0, beta=1.0) 6.0
583+
@test quantile(v, 0.8, alpha=0.0, beta=0.0) 15.4
584+
@test quantile(v, 0.8, alpha=0.0, beta=0.2) 14.12
585+
@test quantile(v, 0.8, alpha=0.0, beta=0.4) 12.84
586+
@test quantile(v, 0.8, alpha=0.0, beta=0.6) 11.56
587+
@test quantile(v, 0.8, alpha=0.0, beta=0.8) 10.28
588+
@test quantile(v, 0.8, alpha=0.0, beta=1.0) 9.0
589+
@test quantile(v, 0.8, alpha=0.2, beta=0.0) 15.72
590+
@test quantile(v, 0.8, alpha=0.2, beta=0.2) 14.44
591+
@test quantile(v, 0.8, alpha=0.2, beta=0.4) 13.16
592+
@test quantile(v, 0.8, alpha=0.2, beta=0.6) 11.88
593+
@test quantile(v, 0.8, alpha=0.2, beta=0.8) 10.6
594+
@test quantile(v, 0.8, alpha=0.2, beta=1.0) 9.32
595+
@test quantile(v, 0.8, alpha=0.4, beta=0.0) 16.04
596+
@test quantile(v, 0.8, alpha=0.4, beta=0.2) 14.76
597+
@test quantile(v, 0.8, alpha=0.4, beta=0.4) 13.48
598+
@test quantile(v, 0.8, alpha=0.4, beta=0.6) 12.2
599+
@test quantile(v, 0.8, alpha=0.4, beta=0.8) 10.92
600+
@test quantile(v, 0.8, alpha=0.4, beta=1.0) 9.64
601+
@test quantile(v, 0.8, alpha=0.6, beta=0.0) 16.36
602+
@test quantile(v, 0.8, alpha=0.6, beta=0.2) 15.08
603+
@test quantile(v, 0.8, alpha=0.6, beta=0.4) 13.8
604+
@test quantile(v, 0.8, alpha=0.6, beta=0.6) 12.52
605+
@test quantile(v, 0.8, alpha=0.6, beta=0.8) 11.24
606+
@test quantile(v, 0.8, alpha=0.6, beta=1.0) 9.96
607+
@test quantile(v, 0.8, alpha=0.8, beta=0.0) 16.68
608+
@test quantile(v, 0.8, alpha=0.8, beta=0.2) 15.4
609+
@test quantile(v, 0.8, alpha=0.8, beta=0.4) 14.12
610+
@test quantile(v, 0.8, alpha=0.8, beta=0.6) 12.84
611+
@test quantile(v, 0.8, alpha=0.8, beta=0.8) 11.56
612+
@test quantile(v, 0.8, alpha=0.8, beta=1.0) 10.28
613+
@test quantile(v, 0.8, alpha=1.0, beta=0.0) 17.0
614+
@test quantile(v, 0.8, alpha=1.0, beta=0.2) 15.72
615+
@test quantile(v, 0.8, alpha=1.0, beta=0.4) 14.44
616+
@test quantile(v, 0.8, alpha=1.0, beta=0.6) 13.16
617+
@test quantile(v, 0.8, alpha=1.0, beta=0.8) 11.88
618+
@test quantile(v, 0.8, alpha=1.0, beta=1.0) 10.6
619+
@test quantile(v, 1.0, alpha=0.0, beta=0.0) 21.0
620+
@test quantile(v, 1.0, alpha=1.0, beta=1.0) 21.0
537621
end
538622

539623
# StatsBase issue 164

0 commit comments

Comments
 (0)