Skip to content

Commit 5f1c77f

Browse files
author
oscarddssmith
committed
sinsum improvements
1 parent df002cc commit 5f1c77f

File tree

3 files changed

+44
-35
lines changed

3 files changed

+44
-35
lines changed

src/besselj.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ function _besselj0(x::Float64)
6363

6464
a = SQ2OPI(T) * sqrt(xinv) * p
6565
xn = xinv*q
66-
b = sin_sum((x,), (pi/4, xn))
66+
b = sin_sum(x, pi/4, xn)
6767
return a * b
6868
end
6969
end
@@ -131,7 +131,7 @@ function _besselj1(x::Float64)
131131

132132
a = SQ2OPI(T) * sqrt(xinv) * p
133133
xn = xinv*q
134-
b = sin_sum((x,), (-pi/4, xn))
134+
b = sin_sum(x, -pi/4, xn)
135135
return a * b * s
136136
end
137137
end

src/bessely.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ function _bessely0_compute(x::Float64)
8585

8686
a = SQ2OPI(T) * sqrt(xinv) * p
8787
xn = xinv*q
88-
b = sin_sum((x,), (-pi/4, xn))
88+
b = sin_sum(x, -pi/4, xn)
8989
return a * b
9090
end
9191
end
@@ -164,7 +164,7 @@ function _bessely1_compute(x::Float64)
164164

165165
a = SQ2OPI(T) * sqrt(xinv) * p
166166
xn = xinv*q
167-
b = sin_sum((-x,), (-pi/4, -xn))
167+
b = sin_sum(-x, -pi/4, -xn)
168168
return a * b
169169
end
170170
end

src/misc.jl

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
using Base.Math: sin_kernel, cos_kernel, rem_pio2_kernel, DoubleFloat64, DoubleFloat32
2-
function sin_double(n,y)
3-
n = n&3
1+
using Base.Math: sin_kernel, cos_kernel, sincos_kernel, rem_pio2_kernel, DoubleFloat64, DoubleFloat32
2+
3+
"""
4+
computes sin(sum(xs)) where xs are sorted by absolute value
5+
Doing this is much more accurate than the naive sin(sum(xs))
6+
"""
7+
function sin_sum(xs::Vararg{T})::T where T<:Base.IEEEFloat
8+
n, y = rem_pio2_sum(xs...)
9+
n &= 3
410
if n == 0
511
return sin_kernel(y)
612
elseif n == 1
@@ -13,21 +19,42 @@ function sin_double(n,y)
1319
end
1420

1521
"""
16-
computes sin(sum(xs)+sum(xlos)) where xs, xlos are sorted by absolute value
17-
and xlos are all <=pi/4 (and therefore don't need to be reduced)
18-
Doing this is much more accurate than the naive sin(sum(xs)+sum(xlos))
22+
computes sincos(sum(xs)) where xs are sorted by absolute value
23+
Doing this is much more accurate than the naive sincos(sum(xs))
1924
"""
20-
function sin_sum(xs::Tuple{Vararg{Float64}}, xlos::Tuple{Vararg{Float64}})
25+
function sincos_sum(xs::Vararg{T})::T where T<:Base.IEEEFloat
26+
n, y = rem_pio2_sum(xs...)
27+
n &= 3
28+
si, co = sincos_kernel(y)
29+
if n == 0
30+
return si, co
31+
elseif n == 1
32+
return co, -si
33+
elseif n == 2
34+
return -si, -co
35+
else
36+
return -co, si
37+
end
38+
end
39+
40+
function rem_pio2_sum(xs::Vararg{Float64})
2141
n = 0
2242
hi, lo = 0.0, 0.0
23-
for x in xs
43+
small_start = length(xs)+1
44+
for i in eachindex(xs)
45+
x = xs[i]
46+
if abs(x) <= pi/4
47+
small_start = i
48+
break
49+
end
2450
n_i, y = rem_pio2_kernel(x)
2551
n += n_i
2652
s = y.hi + hi
2753
lo += (y.hi - (s - hi)) + y.lo
2854
hi = s
2955
end
30-
for x in xlos
56+
for i in small_start:length(xs)
57+
x = xs[i]
3158
s = x + hi
3259
lo += (x - (s - hi))
3360
hi = s
@@ -42,28 +69,10 @@ function sin_sum(xs::Tuple{Vararg{Float64}}, xlos::Tuple{Vararg{Float64}})
4269
lo += 6.123233995736766e-17
4370
n -= 1
4471
end
45-
sin_double(n,DoubleFloat64(hi, lo))
72+
return n, DoubleFloat64(hi, lo)
4673
end
4774

48-
function sin_sum(xs::Tuple{Vararg{Float32}}, xlos::Tuple{Vararg{Float32}})
49-
n = 0
50-
y = 0.0
51-
for x in xs
52-
n_i, y_i = rem_pio2_kernel(x)
53-
n += n_i
54-
y += y_i.hi
55-
end
56-
for x in xlos
57-
y += x
58-
end
59-
while y > pi/4
60-
y -= pi/2
61-
n += 1
62-
end
63-
while y < -pi/4
64-
y += pi/2
65-
n -= 1
66-
end
67-
68-
sin_double(n,DoubleFloat32(y))
75+
function rem_pio2_sum(xs::Vararg{Union{Float32,Float64}})
76+
n, y = rem_pio2_kernel(sum(Float64, xs))
77+
return n, DoubleFloat32(y.hi)
6978
end

0 commit comments

Comments
 (0)