Fix overflows in quantile (#145)

nalimilan · web-flow · commit 35ca0a0fc9de · 2023-07-29T23:32:12.000+02:00
The `a + γ*(b-a)` introduced by JuliaLang/julia#16572 has the advantage that it increases with `γ` even when `a` and `b` are very close, but it has the drawback that it is not robust to overflow. This is likely to happen in practice with small integer and floating point types. Conversely, the `(1-γ)*a + γ*b` which is currently used only for non-finite quantities is robust to overflow but may not always increase with `γ` as when `a` and `b` are very close or (more frequently) equal since precision loss can give a slightly smaller value for a larger `γ`. This can be problematic as it breaks an expected invariant. So keep using the `a + γ*(b-a)` formula when `a ≈ b`, in which case it's almost like returning either `a` or `b` but less arbitrary.
diff --git a/src/Statistics.jl b/src/Statistics.jl
@@ -1020,7 +1020,10 @@ end
         b = v[j + 1]
     end
 
-    if isfinite(a) && isfinite(b)
+    # When a ≉ b, b-a may overflow
+    # When a ≈ b, (1-γ)*a + γ*b may not be increasing with γ due to rounding
+    # Call to float is to work around JuliaLang/julia#50380
+    if isfinite(a) && isfinite(b) && float(a) ≈ float(b)
         return a + γ*(b-a)
     else
         return (1-γ)*a + γ*b
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -753,6 +753,23 @@ let y = [0.40003674665581906, 0.4085630862624367, 0.41662034698690303, 0.4166203
     @test issorted(quantile(y, range(0.01, stop=0.99, length=17)))
 end
 
+@testset "issue #144: no overflow with quantile" begin
+    @test quantile(Float16[-9000, 100], 1.0) ≈ 100
+    @test quantile(Float16[-9000, 100], 0.999999999) ≈ 99.99999
+    @test quantile(Float32[-1e9, 100], 1) ≈ 100
+    @test quantile(Float32[-1e9, 100], 0.9999999999) ≈ 99.89999998
+    @test quantile(Float64[-1e20, 100], 1) ≈ 100
+    @test quantile(Float32[-1e15, -1e14, -1e13, -1e12, -1e11, -1e10, -1e9, 100], 1) ≈ 100
+    @test quantile(Int8[-68, 60], 0.5) ≈ -4
+    @test quantile(Int32[-1e9, 2e9], 1.0) ≈ 2.0e9
+    @test quantile(Int64[-5e18, -2e18, 9e18], 1.0) ≈ 9.0e18
+
+    # check that quantiles are increasing with a, b and p even in corner cases
+    @test issorted(quantile([1.0, 1.0, 1.0+eps(), 1.0+eps()], range(0, 1, length=100)))
+    @test issorted(quantile([1.0, 1.0+1eps(), 1.0+2eps(), 1.0+3eps()], range(0, 1, length=100)))
+    @test issorted(quantile([1.0, 1.0+2eps(), 1.0+4eps(), 1.0+6eps()], range(0, 1, length=100)))
+end
+
 @testset "variance of complex arrays (#13309)" begin
     z = rand(ComplexF64, 10)
     @test var(z) ≈ invoke(var, Tuple{Any}, z) ≈ cov(z) ≈ var(z,dims=1)[1] ≈ sum(abs2, z .- mean(z))/9