Skip to content

Commit b4b9f26

Browse files
committed
better accuracy for cosc for Float32, Float64 around x = 0
Improve accuracy for `cosc(::Float32)` and `cosc(::Float64)`. This is accomplished by: * Using minimax instead of Taylor polynomials. They're more efficient. The minimax polynomials are found using Sollya. * Using multiple polynomials for different subintervals of the domain. Known as "domain splitting". Enables good accuracy for a wider region around the origin than possible with only a single polynomial. The polynomial degrees are kept as-is. Fixes #59065
1 parent 3be3384 commit b4b9f26

File tree

1 file changed

+128
-2
lines changed

1 file changed

+128
-2
lines changed

base/special/trig.jl

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,12 +1130,138 @@ function _cosc(x::Number)
11301130
return isinf_real(x) ? zero(x) : _cosc_generic(x)
11311131
end
11321132
end
1133+
1134+
#=
1135+
1136+
## `cosc(x)` for `x` around the first zero, at `x = 0`
1137+
1138+
`Float32`:
1139+
1140+
```sollya
1141+
prec = 500!;
1142+
accurate = ((pi * x) * cos(pi * x) - sin(pi * x)) / (pi * x * x);
1143+
b1 = 0.27001953125;
1144+
b2 = 0.449951171875;
1145+
domain_0 = [-b1/2, b1];
1146+
domain_1 = [b1, b2];
1147+
machinePrecision = 24;
1148+
freeMonomials = [|1, 3, 5, 7|];
1149+
freeMonomialPrecisions = [|machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
1150+
polynomial_0 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_0);
1151+
polynomial_1 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_1);
1152+
polynomial_0;
1153+
polynomial_1;
1154+
```
1155+
1156+
`Float64`:
1157+
1158+
```sollya
1159+
prec = 500!;
1160+
accurate = ((pi * x) * cos(pi * x) - sin(pi * x)) / (pi * x * x);
1161+
b1 = 0.1700439453125;
1162+
b2 = 0.27001953125;
1163+
b3 = 0.340087890625;
1164+
b4 = 0.39990234375;
1165+
domain_0 = [-b1/2, b1];
1166+
domain_1 = [b1, b2];
1167+
domain_2 = [b2, b3];
1168+
domain_3 = [b3, b4];
1169+
machinePrecision = 53;
1170+
freeMonomials = [|1, 3, 5, 7, 9, 11|];
1171+
freeMonomialPrecisions = [|machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
1172+
polynomial_0 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_0);
1173+
polynomial_1 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_1);
1174+
polynomial_2 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_2);
1175+
polynomial_3 = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, domain_3);
1176+
polynomial_0;
1177+
polynomial_1;
1178+
polynomial_2;
1179+
polynomial_3;
1180+
```
1181+
1182+
=#
1183+
1184+
struct _CosCardinalEvaluationScheme{
1185+
T <: AbstractFloat,
1186+
PolynomialsCloseToZero <: (Tuple{Tuple{T, P}, Vararg{Tuple{T, P}}} where {P <: Tuple{T, Vararg{T}}}),
1187+
}
1188+
polynomials_close_to_origin::PolynomialsCloseToZero
1189+
end
1190+
1191+
function (sch::_CosCardinalEvaluationScheme)(x::AbstractFloat)
1192+
a = abs(x)
1193+
polynomials_close_to_origin = sch.polynomials_close_to_origin
1194+
if (polynomials_close_to_origin !== ()) && (a polynomials_close_to_origin[end][1])
1195+
let
1196+
if length(polynomials_close_to_origin) == 1 # hardcode for each allowed tuple size
1197+
p = only(polynomials_close_to_origin)[2]
1198+
elseif length(polynomials_close_to_origin) == 2
1199+
p = let ((b1, p0), (_, p1)) = polynomials_close_to_origin
1200+
if a b1 # hardcoded binary search
1201+
p0
1202+
else
1203+
p1
1204+
end
1205+
end
1206+
elseif length(polynomials_close_to_origin) == 4
1207+
p = let ((b1, p0), (b2, p1), (b3, p2), (_, p3)) = polynomials_close_to_origin
1208+
if a b2 # hardcoded binary search
1209+
if a b1
1210+
p0
1211+
else
1212+
p1
1213+
end
1214+
else
1215+
if a b3
1216+
p2
1217+
else
1218+
p3
1219+
end
1220+
end
1221+
end
1222+
end
1223+
x * evalpoly(x * x, p)
1224+
end
1225+
elseif isinf(x)
1226+
typeof(x)(0)
1227+
else
1228+
_cosc_generic(x)
1229+
end
1230+
end
1231+
1232+
const _cosc_f32 = let b = Float32 Float16
1233+
_CosCardinalEvaluationScheme(
1234+
(
1235+
(b(0.27), (-3.289868f0, 3.246966f0, -1.1443111f0, 0.20542027f0)),
1236+
(b(0.45), (-3.2898617f0, 3.2467577f0, -1.1420113f0, 0.1965574f0)),
1237+
),
1238+
)
1239+
end
1240+
1241+
const _cosc_f64 = let b = Float64 Float16
1242+
_CosCardinalEvaluationScheme(
1243+
(
1244+
(b(0.17), (-3.289868133696453, 3.2469697011333203, -1.1445109446992934, 0.20918277797812262, -0.023460519561502552, 0.001772485141534688)),
1245+
(b(0.27), (-3.289868133695205, 3.246969700970421, -1.1445109360543062, 0.20918254132488637, -0.023457115021035743, 0.0017515112964895303)),
1246+
(b(0.34), (-3.289868133634355, 3.246969697075094, -1.1445108347839286, 0.209181201609773, -0.023448079433318045, 0.001726628430505518)),
1247+
(b(0.4), (-3.289868133074254, 3.2469696736659346, -1.1445104406286049, 0.20917785794416457, -0.02343378376047161, 0.0017019796223768677)),
1248+
),
1249+
)
1250+
end
1251+
1252+
function _cosc(x::Float32)
1253+
_cosc_f32(x)
1254+
end
1255+
function _cosc(x::Float64)
1256+
_cosc_f64(x)
1257+
end
1258+
11331259
# hard-code Float64/Float32 Taylor series, with coefficients
11341260
# Float64.([(-1)^n*big(pi)^(2n)/((2n+1)*factorial(2n-1)) for n = 1:6])
1135-
_cosc(x::Union{Float64,ComplexF64}) =
1261+
_cosc(x::ComplexF64) =
11361262
fastabs(x) < 0.14 ? x*evalpoly(x^2, (-3.289868133696453, 3.2469697011334144, -1.1445109447325053, 0.2091827825412384, -0.023460810354558236, 0.001781145516372852)) :
11371263
isinf_real(x) ? zero(x) : _cosc_generic(x)
1138-
_cosc(x::Union{Float32,ComplexF32}) =
1264+
_cosc(x::ComplexF32) =
11391265
fastabs(x) < 0.26f0 ? x*evalpoly(x^2, (-3.289868f0, 3.2469697f0, -1.144511f0, 0.20918278f0)) :
11401266
isinf_real(x) ? zero(x) : _cosc_generic(x)
11411267
_cosc(x::Float16) = Float16(_cosc(Float32(x)))

0 commit comments

Comments
 (0)