Skip to content

Commit 4c723db

Browse files
authored
improve cosc(::Float32) and cosc(::Float64) accuracy (#59087)
Accomplished by: * Using minimax instead of Taylor polynomials. They're more efficient. The minimax polynomials are found using Sollya. * Using multiple polynomials for different subintervals of the domain. Known as "domain splitting". Enables good accuracy for a wider region around the origin than possible with only a single polynomial. The polynomial degrees are kept as-is. Fixes #59065
1 parent 3fa51e9 commit 4c723db

File tree

3 files changed

+225
-5
lines changed

3 files changed

+225
-5
lines changed

base/special/trig.jl

Lines changed: 120 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,10 @@ Return a `T(NaN)` if `isnan(x)`.
11041104
See also [`sinc`](@ref).
11051105
"""
11061106
cosc(x::Number) = _cosc(float(x))
1107+
function _cosc_generic(x)
1108+
pi_x = pi * x
1109+
(pi_x*cospi(x)-sinpi(x))/(pi_x*x)
1110+
end
11071111
function _cosc(x::Number)
11081112
# naive cosc formula is susceptible to catastrophic
11091113
# cancellation error near x=0, so we use the Taylor series
@@ -1124,17 +1128,128 @@ function _cosc(x::Number)
11241128
end
11251129
return π*s
11261130
else
1127-
return isinf_real(x) ? zero(x) : ((pi*x)*cospi(x)-sinpi(x))/((pi*x)*x)
1131+
return isinf_real(x) ? zero(x) : _cosc_generic(x)
1132+
end
1133+
end
1134+
1135+
#=
1136+
1137+
## `cosc(x)` for `x` around the first zero, at `x = 0`
1138+
1139+
`Float32`:
1140+
1141+
```sollya
1142+
prec = 500!;
1143+
accurate = ((pi * x) * cos(pi * x) - sin(pi * x)) / (pi * x * x);
1144+
b1 = 0.27001953125;
1145+
b2 = 0.449951171875;
1146+
domain_0 = [-b1/2, b1];
1147+
domain_1 = [b1, b2];
1148+
machinePrecision = 24;
1149+
freeMonomials = [|1, 3, 5, 7|];
1150+
freeMonomialPrecisions = [|machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
1151+
polynomial_0 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_0);
1152+
polynomial_1 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_1);
1153+
polynomial_0;
1154+
polynomial_1;
1155+
```
1156+
1157+
`Float64`:
1158+
1159+
```sollya
1160+
prec = 500!;
1161+
accurate = ((pi * x) * cos(pi * x) - sin(pi * x)) / (pi * x * x);
1162+
b1 = 0.1700439453125;
1163+
b2 = 0.27001953125;
1164+
b3 = 0.340087890625;
1165+
b4 = 0.39990234375;
1166+
domain_0 = [-b1/2, b1];
1167+
domain_1 = [b1, b2];
1168+
domain_2 = [b2, b3];
1169+
domain_3 = [b3, b4];
1170+
machinePrecision = 53;
1171+
freeMonomials = [|1, 3, 5, 7, 9, 11|];
1172+
freeMonomialPrecisions = [|machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
1173+
polynomial_0 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_0);
1174+
polynomial_1 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_1);
1175+
polynomial_2 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_2);
1176+
polynomial_3 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_3);
1177+
polynomial_0;
1178+
polynomial_1;
1179+
polynomial_2;
1180+
polynomial_3;
1181+
```
1182+
1183+
=#
1184+
1185+
function _cos_cardinal_eval(x::AbstractFloat, polynomials_close_to_origin::NTuple)
1186+
function choose_poly(a::AbstractFloat, polynomials_close_to_origin::NTuple{2})
1187+
((b1, p0), (_, p1)) = polynomials_close_to_origin
1188+
if a b1
1189+
p0
1190+
else
1191+
p1
1192+
end
1193+
end
1194+
function choose_poly(a::AbstractFloat, polynomials_close_to_origin::NTuple{4})
1195+
((b1, p0), (b2, p1), (b3, p2), (_, p3)) = polynomials_close_to_origin
1196+
if a b2 # hardcoded binary search
1197+
if a b1
1198+
p0
1199+
else
1200+
p1
1201+
end
1202+
else
1203+
if a b3
1204+
p2
1205+
else
1206+
p3
1207+
end
1208+
end
1209+
end
1210+
a = abs(x)
1211+
if (polynomials_close_to_origin !== ()) && (a polynomials_close_to_origin[end][1])
1212+
x * evalpoly(x * x, choose_poly(a, polynomials_close_to_origin))
1213+
elseif isinf(x)
1214+
typeof(x)(0)
1215+
else
1216+
_cosc_generic(x)
11281217
end
11291218
end
1219+
1220+
const _cosc_f32 = let b = Float32 Float16
1221+
(
1222+
(b(0.27), (-3.289868f0, 3.246966f0, -1.1443111f0, 0.20542027f0)),
1223+
(b(0.45), (-3.2898617f0, 3.2467577f0, -1.1420113f0, 0.1965574f0)),
1224+
)
1225+
end
1226+
1227+
const _cosc_f64 = let b = Float64 Float16
1228+
(
1229+
(b(0.17), (-3.289868133696453, 3.2469697011333203, -1.1445109446992934, 0.20918277797812262, -0.023460519561502552, 0.001772485141534688)),
1230+
(b(0.27), (-3.289868133695205, 3.246969700970421, -1.1445109360543062, 0.20918254132488637, -0.023457115021035743, 0.0017515112964895303)),
1231+
(b(0.34), (-3.289868133634355, 3.246969697075094, -1.1445108347839286, 0.209181201609773, -0.023448079433318045, 0.001726628430505518)),
1232+
(b(0.4), (-3.289868133074254, 3.2469696736659346, -1.1445104406286049, 0.20917785794416457, -0.02343378376047161, 0.0017019796223768677)),
1233+
)
1234+
end
1235+
1236+
function _cosc(x::Union{Float32, Float64})
1237+
if x isa Float32
1238+
pols = _cosc_f32
1239+
elseif x isa Float64
1240+
pols = _cosc_f64
1241+
end
1242+
_cos_cardinal_eval(x, pols)
1243+
end
1244+
11301245
# hard-code Float64/Float32 Taylor series, with coefficients
11311246
# Float64.([(-1)^n*big(pi)^(2n)/((2n+1)*factorial(2n-1)) for n = 1:6])
1132-
_cosc(x::Union{Float64,ComplexF64}) =
1247+
_cosc(x::ComplexF64) =
11331248
fastabs(x) < 0.14 ? x*evalpoly(x^2, (-3.289868133696453, 3.2469697011334144, -1.1445109447325053, 0.2091827825412384, -0.023460810354558236, 0.001781145516372852)) :
1134-
isinf_real(x) ? zero(x) : ((pi*x)*cospi(x)-sinpi(x))/((pi*x)*x)
1135-
_cosc(x::Union{Float32,ComplexF32}) =
1249+
isinf_real(x) ? zero(x) : _cosc_generic(x)
1250+
_cosc(x::ComplexF32) =
11361251
fastabs(x) < 0.26f0 ? x*evalpoly(x^2, (-3.289868f0, 3.2469697f0, -1.144511f0, 0.20918278f0)) :
1137-
isinf_real(x) ? zero(x) : ((pi*x)*cospi(x)-sinpi(x))/((pi*x)*x)
1252+
isinf_real(x) ? zero(x) : _cosc_generic(x)
11381253
_cosc(x::Float16) = Float16(_cosc(Float32(x)))
11391254
_cosc(x::ComplexF16) = ComplexF16(_cosc(ComplexF32(x)))
11401255

test/math.jl

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
include("testhelpers/EvenIntegers.jl")
44
using .EvenIntegers
55

6+
include("testhelpers/ULPError.jl")
7+
using .ULPError
8+
69
using Random
710
using LinearAlgebra
811
using Base.Experimental: @force_compile
@@ -25,6 +28,55 @@ has_fma = Dict(
2528
BigFloat => true,
2629
)
2730

31+
@testset "meta test: ULPError" begin
32+
examples_f64 = (-3e0, -2e0, -1e0, -1e-1, -1e-10, -0e0, 0e0, 1e-10, 1e-1, 1e0, 2e0, 3e0)::Tuple{Vararg{Float64}}
33+
examples_f16 = Float16.(examples_f64)
34+
examples_f32 = Float32.(examples_f64)
35+
examples = (examples_f16..., examples_f32..., examples_f64...)
36+
@testset "edge cases" begin
37+
@testset "zero error - two equivalent values" begin
38+
@test 0 == @inferred ulp_error(NaN, NaN)
39+
@test 0 == @inferred ulp_error(-NaN, -NaN)
40+
@test 0 == @inferred ulp_error(-NaN, NaN)
41+
@test 0 == @inferred ulp_error(NaN, -NaN)
42+
@test 0 == @inferred ulp_error(Inf, Inf)
43+
@test 0 == @inferred ulp_error(-Inf, -Inf)
44+
@test 0 == @inferred ulp_error(0.0, 0.0)
45+
@test 0 == @inferred ulp_error(-0.0, -0.0)
46+
@test 0 == @inferred ulp_error(-0.0, 0.0)
47+
@test 0 == @inferred ulp_error(0.0, -0.0)
48+
@test 0 == @inferred ulp_error(3.0, 3.0)
49+
@test 0 == @inferred ulp_error(-3.0, -3.0)
50+
end
51+
@testset "infinite error" begin
52+
@test Inf == @inferred ulp_error(NaN, 0.0)
53+
@test Inf == @inferred ulp_error(0.0, NaN)
54+
@test Inf == @inferred ulp_error(NaN, 3.0)
55+
@test Inf == @inferred ulp_error(3.0, NaN)
56+
@test Inf == @inferred ulp_error(NaN, Inf)
57+
@test Inf == @inferred ulp_error(Inf, NaN)
58+
@test Inf == @inferred ulp_error(Inf, -Inf)
59+
@test Inf == @inferred ulp_error(-Inf, Inf)
60+
@test Inf == @inferred ulp_error(0.0, Inf)
61+
@test Inf == @inferred ulp_error(Inf, 0.0)
62+
@test Inf == @inferred ulp_error(3.0, Inf)
63+
@test Inf == @inferred ulp_error(Inf, 3.0)
64+
end
65+
end
66+
@testset "faithful" begin
67+
for x in examples
68+
@test 1 == @inferred ulp_error(x, nextfloat(x, 1))
69+
@test 1 == @inferred ulp_error(x, nextfloat(x, -1))
70+
end
71+
end
72+
@testset "midpoint" begin
73+
for x in examples
74+
a = abs(x)
75+
@test 1 == 2 * @inferred ulp_error(copysign((widen(a) + nextfloat(a, 1))/2, x), x)
76+
end
77+
end
78+
end
79+
2880
@testset "clamp" begin
2981
let
3082
@test clamp(0, 1, 3) == 1
@@ -685,6 +737,12 @@ end
685737
@test cosc(big"0.5") big"-1.273239544735162686151070106980114896275677165923651589981338752471174381073817" rtol=1e-76
686738
@test cosc(big"0.499") big"-1.272045747741181369948389133250213864178198918667041860771078493955590574971317" rtol=1e-76
687739
end
740+
741+
@testset "accuracy of `cosc` around the origin" begin
742+
for t in (Float32, Float64)
743+
@test ulp_error_maximum(cosc, range(start = t(-1), stop = t(1), length = 5000)) < 4
744+
end
745+
end
688746
end
689747

690748
@testset "Irrational args to sinpi/cospi/tanpi/sinc/cosc" begin

test/testhelpers/ULPError.jl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
module ULPError
4+
export ulp_error, ulp_error_maximum
5+
function ulp_error(accurate::AbstractFloat, approximate::AbstractFloat)
6+
# the ULP error is usually not required to great accuracy, so `Float32` should be precise enough
7+
zero_return = 0f0
8+
inf_return = Inf32
9+
# handle floating-point edge cases
10+
if !(isfinite(accurate) && isfinite(approximate))
11+
accur_is_nan = isnan(accurate)
12+
approx_is_nan = isnan(approximate)
13+
if accur_is_nan || approx_is_nan
14+
return if accur_is_nan === approx_is_nan
15+
zero_return
16+
else
17+
inf_return
18+
end
19+
end
20+
if isinf(approximate)
21+
return if isinf(accurate) && (signbit(accurate) == signbit(approximate))
22+
zero_return
23+
else
24+
inf_return
25+
end
26+
end
27+
end
28+
acc = if accurate isa Union{Float16, Float32}
29+
# widen for better accuracy when doing so does not impact performance too much
30+
widen(accurate)
31+
else
32+
accurate
33+
end
34+
abs(Float32((approximate - acc) / eps(approximate))::Float32)
35+
end
36+
function ulp_error(accurate, approximate, x::AbstractFloat)
37+
acc = accurate(x)
38+
app = approximate(x)
39+
ulp_error(acc, app)
40+
end
41+
function ulp_error(func::Func, x::AbstractFloat) where {Func}
42+
ulp_error(func BigFloat, func, x)
43+
end
44+
function ulp_error_maximum(func::Func, iterator) where {Func}
45+
maximum(Base.Fix1(ulp_error, func), iterator)
46+
end
47+
end

0 commit comments

Comments
 (0)