|
1 | | -function double(::Type{T}, x::BigFloat) where {T<:IEEEFloat} |
2 | | - prec = precision(BigFloat) |
3 | | - setprecision(BigFloat, max(prec, 768)) |
4 | | - hi = T(x) |
5 | | - lo = T(x - hi) |
6 | | - setprecision(BigFloat, prec) |
7 | | - return hi, lo |
8 | | -end |
9 | | - |
10 | | -function double(::Type{T}, x::String) where {T<:IEEEFloat} |
11 | | - prec = precision(BigFloat) |
12 | | - setprecision(BigFloat, 768) |
13 | | - z = parse(BigFloat, x) |
14 | | - hi = T(z) |
15 | | - lo = T(z - hi) |
16 | | - setprecision(BigFloat, prec) |
17 | | - return hi, lo |
18 | | -end |
19 | | - |
20 | | -double64(x::BigFloat) = double(Float64, x) |
21 | | -double32(x::BigFloat) = double(Float32, x) |
22 | | -double16(x::BigFloat) = double(Float16, x) |
23 | | - |
24 | | -double64(x::String) = double(Float64, x) |
25 | | -double32(x::String) = double(Float32, x) |
26 | | -double16(x::String) = double(Float16, x) |
27 | | - |
28 | | -function double_inv(::Type{T}, x::BigFloat) where {T<:IEEEFloat} |
29 | | - prec = precision(BigFloat) |
30 | | - setprecision(BigFloat, max(prec, 768)) |
31 | | - x = inv(x) |
32 | | - hi = T(x) |
33 | | - lo = T(x - hi) |
34 | | - setprecision(BigFloat, prec) |
35 | | - return hi, lo |
36 | | -end |
37 | | - |
38 | | -double64inv(x::BigFloat) = double_inv(Float64, x) |
39 | | -double32inv(x::BigFloat) = double_inv(Float32, x) |
40 | | -double16inv(x::BigFloat) = double_inv(Float16, x) |
41 | | - |
42 | 1 | #= |
43 | 2 | algorithms from |
44 | 3 | Mioara Joldes, Jean-Michel Muller, Valentina Popescu. |
@@ -88,16 +47,6 @@ function TwoSum(a::T, b::T) where {T<:AbstractFloat} |
88 | 47 | return s, t |
89 | 48 | end |
90 | 49 |
|
91 | | -function TwoDiff(a::T, b::T) where {T<:AbstractFloat} |
92 | | - s = a - b |
93 | | - a1 = s + b |
94 | | - b1 = s - a1 |
95 | | - da = a - a1 |
96 | | - db = b - b1 |
97 | | - t = da + db |
98 | | - return s, t |
99 | | -end |
100 | | - |
101 | 50 | # Algorithm 3 in ref: error-free transformation |
102 | 51 |
|
103 | 52 | @inline function Fast2Mult(a::T, b::T) where {T<:AbstractFloat} |
|
115 | 64 | return zₕᵢ, zₗₒ |
116 | 65 | end |
117 | 66 |
|
118 | | -@inline function DWMinusFP(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} |
119 | | - sₕᵢ, sₗₒ = TwoDiff(xₕᵢ, y) |
120 | | - v = xₗₒ + sₗₒ |
121 | | - zₕᵢ, zₗₒ = TwoSum(sₕᵢ, v) |
122 | | - return zₕᵢ, zₗₒ |
123 | | -end |
124 | | - |
125 | | -# Algorithm 6 in ref: relerr 3u² + 13u³ [reltime 35] |
126 | | - |
127 | | -function AccurateDWPlusDW(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
128 | | - sₕᵢ, sₗₒ = TwoSum(xₕᵢ, yₕᵢ) |
129 | | - tₕᵢ, tₗₒ = TwoSum(xₗₒ, yₗₒ) |
130 | | - c = sₗₒ + tₕᵢ |
131 | | - vₕᵢ, vₗₒ = Fast2Sum(sₕᵢ, c) |
132 | | - w = tₗₒ + vₗₒ |
133 | | - zₕᵢ, zₗₒ = Fast2Sum(vₕᵢ, w) |
134 | | - return zₕᵢ, zₗₒ |
135 | | -end |
136 | | - |
137 | | -function AccurateDWMinusDW(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
138 | | - sₕᵢ, sₗₒ = TwoDiff(xₕᵢ, yₕᵢ) |
139 | | - tₕᵢ, tₗₒ = TwoDiff(xₗₒ, yₗₒ) |
140 | | - c = sₗₒ + tₕᵢ |
141 | | - vₕᵢ, vₗₒ = Fast2Sum(sₕᵢ, c) |
142 | | - w = tₗₒ + vₗₒ |
143 | | - zₕᵢ, zₗₒ = Fast2Sum(vₕᵢ, w) |
144 | | - return zₕᵢ, zₗₒ |
145 | | -end |
146 | | - |
147 | | - |
148 | | -# Algorithm 7 in ref: relerr (³/₂)u² + 4u³ [reltime 18] |
149 | | - |
150 | | -@inline function DWTimesFP1(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} |
151 | | - cₕᵢ, c1 = Fast2Mult(xₕᵢ, y) |
152 | | - c2 = xₗₒ * y |
153 | | - tₕᵢ, t1 = Fast2Sum(cₕᵢ, c2) |
154 | | - t2 = t1 + c1 |
155 | | - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, t2) |
156 | | - return zₕᵢ, zₗₒ |
157 | | -end |
158 | | - |
159 | 67 | # Algorithm 9 in ref: relerr 2u² [reltime 15] |
160 | 68 |
|
161 | 69 | @inline function DWTimesFP3(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} |
|
165 | 73 | return zₕᵢ, zₗₒ |
166 | 74 | end |
167 | 75 |
|
168 | | -# Algorithm 11 in ref: relerr 6u² [reltime 16] |
169 | | - |
170 | | -function DWTimesDW2(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
171 | | - cₕᵢ, c1 = Fast2Mult(xₕᵢ, yₕᵢ) |
172 | | - t0 = xₕᵢ * yₗₒ |
173 | | - c2 = fma(xₗₒ, yₕᵢ, t0) |
174 | | - c3 = c1 + c2 |
175 | | - zₕᵢ, zₗₒ = Fast2Sum(cₕᵢ, c3) |
176 | | - return zₕᵢ, zₗₒ |
177 | | -end |
178 | | - |
179 | | -# Algorithm 12 in ref: relerr 5u² [reltime 17] |
180 | | - |
181 | | -function DWTimesDW3(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
182 | | - cₕᵢ, c1 = Fast2Mult(xₕᵢ, yₕᵢ) |
183 | | - t0 = xₗₒ * yₗₒ |
184 | | - t1 = fma(xₕᵢ, yₗₒ, t0) |
185 | | - c2 = fma(xₗₒ, yₕᵢ, t1) |
186 | | - c3 = c1 + c2 |
187 | | - zₕᵢ, zₗₒ = Fast2Sum(cₕᵢ, c3) |
188 | | - return zₕᵢ, zₗₒ |
189 | | -end |
190 | | - |
191 | | -# Algorithm 15 in ref: relerr 3u² [reltime 27] |
192 | | - |
193 | | -function DWDivFP3(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} |
194 | | - tₕᵢ = xₕᵢ / y |
195 | | - pₕᵢ, pₗₒ = Fast2Mult(tₕᵢ, y) |
196 | | - dₕᵢ = xₕᵢ - pₕᵢ |
197 | | - dₗₒ = dₕᵢ - pₗₒ |
198 | | - d = dₗₒ + xₗₒ |
199 | | - tₗₒ = d / y |
200 | | - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, tₗₒ) |
201 | | - return zₕᵢ, zₗₒ |
202 | | -end |
203 | | - |
204 | | -# Algorithm 17 in ref: relerr 15u² + 56u³ [reltime 50] |
205 | | - |
206 | | -function DWDivDW2(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
207 | | - tₕᵢ = xₕᵢ / yₕᵢ |
208 | | - rₕᵢ, rₗₒ = DWTimesFP1(yₕᵢ, yₗₒ, tₕᵢ) |
209 | | - dₕᵢ = xₕᵢ - rₕᵢ |
210 | | - dₗₒ = xₗₒ - rₗₒ |
211 | | - d = dₕᵢ + dₗₒ |
212 | | - tₗₒ = d / yₕᵢ |
213 | | - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, tₗₒ) |
214 | | - return zₕᵢ, zₗₒ |
215 | | -end |
216 | | - |
217 | | -# Algorithm 18 in ref: relerr < 10u² (6u² seen) [reltime 107] |
218 | | -# (note DWTimesDW3 replaces DWTimesDW2 per ref) |
219 | | - |
220 | | -function DWDivDW3(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
221 | | - tₕᵢ = inv(yₕᵢ) |
222 | | - rₕᵢ = fma(yₕᵢ, -tₕᵢ, one(T)) |
223 | | - rₗₒ = -(yₗₒ * tₕᵢ) |
224 | | - eₕᵢ, eₗₒ = Fast2Sum(rₕᵢ, rₗₒ) |
225 | | - dₕᵢ, dₗₒ = DWTimesFP3(eₕᵢ, eₗₒ, tₕᵢ) |
226 | | - mₕᵢ, mₗₒ = DWPlusFP(dₕᵢ, dₗₒ, tₕᵢ) |
227 | | - zₕᵢ, zₗₒ = DWTimesDW3(xₕᵢ, xₗₒ, mₕᵢ, mₗₒ) |
228 | | - return zₕᵢ, zₗₒ |
229 | | -end |
230 | | - |
231 | 76 | # inv(...) using Algorithms 17 and 18 |
232 | 77 |
|
233 | | -# Algorithm 17 in ref: relerr 15u² + 56u³ [reltime 48] |
234 | | - |
235 | | -function DWInvDW2(yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
236 | | - tₕᵢ = one(T) / yₕᵢ |
237 | | - rₕᵢ, rₗₒ = DWTimesFP1(yₕᵢ, yₗₒ, tₕᵢ) |
238 | | - dₕᵢ = one(T) - rₕᵢ |
239 | | - dₗₒ = -rₗₒ |
240 | | - d = dₕᵢ + dₗₒ |
241 | | - tₗₒ = d / yₕᵢ |
242 | | - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, tₗₒ) |
243 | | - return zₕᵢ, zₗₒ |
244 | | -end |
245 | | - |
246 | 78 | # Algorithm 18 in ref: relerr < 10u² (6u² seen) [reltime 72] |
247 | | -# (note DWTimesDW3 replaces DWTimesDW2 per ref) |
248 | 79 |
|
249 | 80 | function DWInvDW3(yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} |
250 | 81 | tₕᵢ = inv(yₕᵢ) |
|
0 commit comments