Skip to content

Commit daf86ce

Browse files
author
oscarddssmith
committed
improve Float32 rem_pio2_sum
1 parent 5f1c77f commit daf86ce

File tree

1 file changed

+31
-17
lines changed

1 file changed

+31
-17
lines changed

src/misc.jl

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,16 @@ end
4040
function rem_pio2_sum(xs::Vararg{Float64})
4141
n = 0
4242
hi, lo = 0.0, 0.0
43-
small_start = length(xs)+1
44-
for i in eachindex(xs)
45-
x = xs[i]
43+
for x in xs
4644
if abs(x) <= pi/4
47-
small_start = i
48-
break
45+
s = x + hi
46+
lo += (x - (s - hi))
47+
else
48+
n_i, y = rem_pio2_kernel(x)
49+
n += n_i
50+
s = y.hi + hi
51+
lo += (y.hi - (s - hi)) + y.lo
4952
end
50-
n_i, y = rem_pio2_kernel(x)
51-
n += n_i
52-
s = y.hi + hi
53-
lo += (y.hi - (s - hi)) + y.lo
54-
hi = s
55-
end
56-
for i in small_start:length(xs)
57-
x = xs[i]
58-
s = x + hi
59-
lo += (x - (s - hi))
6053
hi = s
6154
end
6255
while hi > pi/4
@@ -72,7 +65,28 @@ function rem_pio2_sum(xs::Vararg{Float64})
7265
return n, DoubleFloat64(hi, lo)
7366
end
7467

75-
function rem_pio2_sum(xs::Vararg{Union{Float32,Float64}})
76-
n, y = rem_pio2_kernel(sum(Float64, xs))
68+
function rem_pio2_sum(xs::Vararg{Float32})
69+
y = 0.0
70+
n = 0
71+
# The minimum cosine or sine of any Float32 that gets reduced is 1.6e-9
72+
# so reducing at 2^22 prevents catastrophic loss of precision.
73+
# There probably is a case where this loses some digits but it is a decent
74+
# tradeoff between accuracy and speed.
75+
@fastmath for x in xs
76+
if x > 0x1p22
77+
n_i, y_i = rem_pio2_kernel(Float32(x))
78+
n += n_i
79+
y += y_i.hi
80+
else
81+
y += x
82+
end
83+
end
84+
n_i, y = rem_pio2_kernel(y)
85+
return n + n_i, DoubleFloat32(y.hi)
86+
end
87+
88+
function rem_pio2_sum(xs::Vararg{Float16})
89+
y = sum(Float64, xs) #Float16 can be losslessly accumulated in Float64
90+
n, y = rem_pio2_kernel(y)
7791
return n, DoubleFloat32(y.hi)
7892
end

0 commit comments

Comments
 (0)