diff --git a/src/doubletriple/double.jl b/src/doubletriple/double.jl index af6ea673..6f3db387 100644 --- a/src/doubletriple/double.jl +++ b/src/doubletriple/double.jl @@ -1,44 +1,3 @@ -function double(::Type{T}, x::BigFloat) where {T<:IEEEFloat} - prec = precision(BigFloat) - setprecision(BigFloat, max(prec, 768)) - hi = T(x) - lo = T(x - hi) - setprecision(BigFloat, prec) - return hi, lo -end - -function double(::Type{T}, x::String) where {T<:IEEEFloat} - prec = precision(BigFloat) - setprecision(BigFloat, 768) - z = parse(BigFloat, x) - hi = T(z) - lo = T(z - hi) - setprecision(BigFloat, prec) - return hi, lo -end - -double64(x::BigFloat) = double(Float64, x) -double32(x::BigFloat) = double(Float32, x) -double16(x::BigFloat) = double(Float16, x) - -double64(x::String) = double(Float64, x) -double32(x::String) = double(Float32, x) -double16(x::String) = double(Float16, x) - -function double_inv(::Type{T}, x::BigFloat) where {T<:IEEEFloat} - prec = precision(BigFloat) - setprecision(BigFloat, max(prec, 768)) - x = inv(x) - hi = T(x) - lo = T(x - hi) - setprecision(BigFloat, prec) - return hi, lo -end - -double64inv(x::BigFloat) = double_inv(Float64, x) -double32inv(x::BigFloat) = double_inv(Float32, x) -double16inv(x::BigFloat) = double_inv(Float16, x) - #= algorithms from Mioara Joldes, Jean-Michel Muller, Valentina Popescu. @@ -88,16 +47,6 @@ function TwoSum(a::T, b::T) where {T<:AbstractFloat} return s, t end -function TwoDiff(a::T, b::T) where {T<:AbstractFloat} - s = a - b - a1 = s + b - b1 = s - a1 - da = a - a1 - db = b - b1 - t = da + db - return s, t -end - # Algorithm 3 in ref: error-free transformation @inline function Fast2Mult(a::T, b::T) where {T<:AbstractFloat} @@ -115,47 +64,6 @@ end return zₕᵢ, zₗₒ end -@inline function DWMinusFP(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} - sₕᵢ, sₗₒ = TwoDiff(xₕᵢ, y) - v = xₗₒ + sₗₒ - zₕᵢ, zₗₒ = TwoSum(sₕᵢ, v) - return zₕᵢ, zₗₒ -end - -# Algorithm 6 in ref: relerr 3u² + 13u³ [reltime 35] - -function AccurateDWPlusDW(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} - sₕᵢ, sₗₒ = TwoSum(xₕᵢ, yₕᵢ) - tₕᵢ, tₗₒ = TwoSum(xₗₒ, yₗₒ) - c = sₗₒ + tₕᵢ - vₕᵢ, vₗₒ = Fast2Sum(sₕᵢ, c) - w = tₗₒ + vₗₒ - zₕᵢ, zₗₒ = Fast2Sum(vₕᵢ, w) - return zₕᵢ, zₗₒ -end - -function AccurateDWMinusDW(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} - sₕᵢ, sₗₒ = TwoDiff(xₕᵢ, yₕᵢ) - tₕᵢ, tₗₒ = TwoDiff(xₗₒ, yₗₒ) - c = sₗₒ + tₕᵢ - vₕᵢ, vₗₒ = Fast2Sum(sₕᵢ, c) - w = tₗₒ + vₗₒ - zₕᵢ, zₗₒ = Fast2Sum(vₕᵢ, w) - return zₕᵢ, zₗₒ -end - - -# Algorithm 7 in ref: relerr (³/₂)u² + 4u³ [reltime 18] - -@inline function DWTimesFP1(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} - cₕᵢ, c1 = Fast2Mult(xₕᵢ, y) - c2 = xₗₒ * y - tₕᵢ, t1 = Fast2Sum(cₕᵢ, c2) - t2 = t1 + c1 - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, t2) - return zₕᵢ, zₗₒ -end - # Algorithm 9 in ref: relerr 2u² [reltime 15] @inline function DWTimesFP3(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} @@ -165,86 +73,9 @@ end return zₕᵢ, zₗₒ end -# Algorithm 11 in ref: relerr 6u² [reltime 16] - -function DWTimesDW2(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} - cₕᵢ, c1 = Fast2Mult(xₕᵢ, yₕᵢ) - t0 = xₕᵢ * yₗₒ - c2 = fma(xₗₒ, yₕᵢ, t0) - c3 = c1 + c2 - zₕᵢ, zₗₒ = Fast2Sum(cₕᵢ, c3) - return zₕᵢ, zₗₒ -end - -# Algorithm 12 in ref: relerr 5u² [reltime 17] - -function DWTimesDW3(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} - cₕᵢ, c1 = Fast2Mult(xₕᵢ, yₕᵢ) - t0 = xₗₒ * yₗₒ - t1 = fma(xₕᵢ, yₗₒ, t0) - c2 = fma(xₗₒ, yₕᵢ, t1) - c3 = c1 + c2 - zₕᵢ, zₗₒ = Fast2Sum(cₕᵢ, c3) - return zₕᵢ, zₗₒ -end - -# Algorithm 15 in ref: relerr 3u² [reltime 27] - -function DWDivFP3(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat} - tₕᵢ = xₕᵢ / y - pₕᵢ, pₗₒ = Fast2Mult(tₕᵢ, y) - dₕᵢ = xₕᵢ - pₕᵢ - dₗₒ = dₕᵢ - pₗₒ - d = dₗₒ + xₗₒ - tₗₒ = d / y - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, tₗₒ) - return zₕᵢ, zₗₒ -end - -# Algorithm 17 in ref: relerr 15u² + 56u³ [reltime 50] - -function DWDivDW2(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} - tₕᵢ = xₕᵢ / yₕᵢ - rₕᵢ, rₗₒ = DWTimesFP1(yₕᵢ, yₗₒ, tₕᵢ) - dₕᵢ = xₕᵢ - rₕᵢ - dₗₒ = xₗₒ - rₗₒ - d = dₕᵢ + dₗₒ - tₗₒ = d / yₕᵢ - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, tₗₒ) - return zₕᵢ, zₗₒ -end - -# Algorithm 18 in ref: relerr < 10u² (6u² seen) [reltime 107] -# (note DWTimesDW3 replaces DWTimesDW2 per ref) - -function DWDivDW3(xₕᵢ::T, xₗₒ::T, yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} - tₕᵢ = inv(yₕᵢ) - rₕᵢ = fma(yₕᵢ, -tₕᵢ, one(T)) - rₗₒ = -(yₗₒ * tₕᵢ) - eₕᵢ, eₗₒ = Fast2Sum(rₕᵢ, rₗₒ) - dₕᵢ, dₗₒ = DWTimesFP3(eₕᵢ, eₗₒ, tₕᵢ) - mₕᵢ, mₗₒ = DWPlusFP(dₕᵢ, dₗₒ, tₕᵢ) - zₕᵢ, zₗₒ = DWTimesDW3(xₕᵢ, xₗₒ, mₕᵢ, mₗₒ) - return zₕᵢ, zₗₒ -end - # inv(...) using Algorithms 17 and 18 -# Algorithm 17 in ref: relerr 15u² + 56u³ [reltime 48] - -function DWInvDW2(yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} - tₕᵢ = one(T) / yₕᵢ - rₕᵢ, rₗₒ = DWTimesFP1(yₕᵢ, yₗₒ, tₕᵢ) - dₕᵢ = one(T) - rₕᵢ - dₗₒ = -rₗₒ - d = dₕᵢ + dₗₒ - tₗₒ = d / yₕᵢ - zₕᵢ, zₗₒ = Fast2Sum(tₕᵢ, tₗₒ) - return zₕᵢ, zₗₒ -end - # Algorithm 18 in ref: relerr < 10u² (6u² seen) [reltime 72] -# (note DWTimesDW3 replaces DWTimesDW2 per ref) function DWInvDW3(yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat} tₕᵢ = inv(yₕᵢ) diff --git a/src/doubletriple/double_consts.jl b/src/doubletriple/double_consts.jl index 21c13341..373a4c0b 100644 --- a/src/doubletriple/double_consts.jl +++ b/src/doubletriple/double_consts.jl @@ -1,41 +1,8 @@ -const one_d64 = (1.0, 0.0) - const pi_4o1_d64 = (12.56637061435917200, 4.8985871965894130e-16) const pi_2o1_d64 = ( 6.28318530717958600, 2.4492935982947064e-16) const pi_1o1_d64 = ( 3.14159265358979300, 1.2246467991473532e-16) const pi_1o2_d64 = ( 1.57079632679489660, 6.1232339957367660e-17) -const pi_15o32_d64 = ( 1.47262155637021560, -5.3616983752483473e-17) -const pi_7o16_d64 = ( 1.37444678594553450, 5.3578297462696700e-17) -const pi_1o3_d64 = ( 1.04719755119659790, -1.0720817664510910e-16) const pi_1o4_d64 = ( 0.78539816339744830, 3.0616169978683830e-17) -const pi_1o6_d64 = ( 0.52359877559829890, -5.3604088322554550e-17) -const pi_1o8_d64 = ( 0.39269908169872414, 1.5308084989341915e-17) -const pi_1o12_d64 = ( 0.26179938779914946, -2.6802044161277275e-17) -const pi_1o16_d64 = ( 0.19634954084936207, 7.6540424946709580e-18) -const pi_1o24_d64 = ( 0.13089969389957473, -1.3401022080638637e-17) -const pi_1o32_d64 = ( 0.09817477042468103, 3.8270212473354790e-18) -const pi_1o64_d64 = ( 0.04908738521234052, 1.9135106236677394e-18) -const pi_1o128_d64 = ( 0.02454369260617026, 9.5675531183386970e-19) -const pi_1o256_d64 = ( 0.01227184630308513, 4.7837765591693480e-19) - - -const inv_pi_4o1_d64 = ( 0.07957747154594767, -4.9196691687956215e-18) -const inv_pi_2o1_d64 = ( 0.15915494309189535, -9.8393383375912430e-18) -const inv_pi_1o1_d64 = ( 0.31830988618379070, -1.9678676675182486e-17) -const inv_pi_1o2_d64 = ( 0.63661977236758140, -3.9357353350364970e-17) -const inv_pi_15o32_d64 = ( 0.67906109052542010, 2.4632204570453420e-17) -const inv_pi_7o16_d64 = ( 0.72756545413437870, 2.6011543692324530e-18) -const inv_pi_1o3_d64 = ( 0.95492965855137200, -3.5248787942896340e-18) -const inv_pi_1o4_d64 = ( 1.27323954473516280, -7.8714706700729940e-17) -const inv_pi_1o6_d64 = ( 1.90985931710274400, -7.0497575885792670e-18) -const inv_pi_1o8_d64 = ( 2.54647908947032550, -1.5742941340145989e-16) -const inv_pi_1o12_d64 = ( 3.81971863420548800, -1.4099515177158535e-17) -const inv_pi_1o16_d64 = ( 5.09295817894065100, -3.1485882680291977e-16) -const inv_pi_1o24_d64 = ( 7.63943726841097600, -2.8199030354317070e-17) -const inv_pi_1o32_d64 = ( 10.18591635788130200, -6.2971765360583950e-16) -const inv_pi_1o64_d64 = ( 20.37183271576260400, -1.2594353072116790e-15) -const inv_pi_1o128_d64 = ( 40.74366543152521000, -2.5188706144233580e-15) -const inv_pi_1o256_d64 = ( 81.48733086305042000, -5.0377412288467160e-15) const pi_4o1_d32 = (12.566371f0, -3.496911f-7) const pi_4o1_d16 = (Float16(12.56), Float16(0.00387)) @@ -71,14 +38,3 @@ pi1o2(::Type{Double16}) = Double16(pi_1o2_d16) pi1o4(::Type{Double64}) = Double64(pi_1o4_d64) pi1o4(::Type{Double32}) = Double32(pi_1o4_d32) pi1o4(::Type{Double16}) = Double16(pi_1o4_d16) - -# below is to be removed -const twopi_d64 = (6.283185307179586, 2.4492935982947064e-16) -const onepi_d64 = (3.141592653589793, 1.2246467991473532e-16) -const halfpi_d64 = (1.5707963267948966, 6.123233995736766e-17) -const qrtrpi_d64 = (0.7853981633974483, 3.061616997868383e-17) - -const inv_twopi_d64 = (0.15915494309189535, -9.839338337591243e-18) -const inv_onepi_d64 = (0.3183098861837907, -1.9678676675182486e-17) -const inv_halfpi_d64 = (0.6366197723675814, -3.935735335036497e-17) -const inv_qrtrpi_d64 = (1.2732395447351628, -7.871470670072994e-17) diff --git a/src/doubletriple/quadword.jl b/src/doubletriple/quadword.jl deleted file mode 100644 index 3af60803..00000000 --- a/src/doubletriple/quadword.jl +++ /dev/null @@ -1,119 +0,0 @@ -@inline max_min(x,y) = ifelse(abs(y) < abs(x), (x,y) , (y,x)) - -function maxtomin(a::T, b::T, c::T, d::T) where {T} - a, b = max_min(a, b) - c, d = max_min(c, d) - - a, c = max_min(a, c) - b, d = max_min(b, d) - - b, c = max_min(b, c) - - return a, b, c, d -end - -function maxtomin(a::T, b::T, c::T, d::T) where {T} - a, b = max_min(a, b) - c, d = max_min(c, d) - a, c = max_min(a, c) - b, d = max_min(b, d) - b, c = max_min(b, c) - return a, b, c, d -end - -function vec_sum(x0::T, x1::T, x2::T, x3::T) where {T} - s3 = x3 - s2, e3 = two_sum(x2, s3) - s1, e2 = two_sum(x1, s2) - s0, e1 = two_sum(x0, s1) - return s0,e1,e2,e3 -end - -function vecsum_errbranch(x::NTuple{4,T}) where {T} - y = r = e = zeros(T, 4) - j = 1 - e[1] = x[1] - for i = 1:2 - r[i], t = two_sum(e[i], x[i+1]) - if t !== zero(T) - y[j] = r[i] - e[i+1] = t - j += 1 - else - e[i+1] = r[i] - end - end - y[j], y[j+1] = two_sum(e[3], x[4]) - return y -end - -function fast_vecsum_errbranch(x::NTuple{4,T}) where {T} - y = zeros(T, 4) - j = 1 - # e[1] = x1 - # i = 1 - r, t = two_sum(x[1], x[2]) - if t !== zero(T) - y[j] = r - e = t - j += 1 - else - e = r - end - # i = 2 - r, t = two_sum(e, x[3]) - if t !== zero(T) - y[j] = r - e = t - j += 1 - else - e = r - end - - y[j], y[j+1] = two_sum(e, x[4]) - return y -end - - -function fast_vecsum_errbranch(x1::T,x2::T,x3::T,x4::T) where {T} - y = zeros(T, 4) - j = 1 - # e[1] = x1 - # i = 1 - r, t = two_sum(x1, x2) - if t !== zero(T) - y[j] = r - e = t - j += 1 - else - e = r - end - # i = 2 - r, t = two_sum(e, x3) - if t !== zero(T) - y[j] = r - e = t - j += 1 - else - e = r - end - - y[j], y[j+1] = two_sum(e, x4) - return y -end - -function quadword(x1::T, x2::T, x3::T, x4::T) where {T} - a1, a2 = two_sum(x1, x2) - b1, b2 = two_sum(x3, x4) - c1, c2 = two_sum(a1, b1) - d1, d2 = two_sum(a2, b2) - e1to4 = vec_sum(c1,c2,d1,d2) - y = vecsum_errbranch(e1to4) - return (y...,) -end - -@inline function fast_quadword(x1::T, x2::T, x3::T, x4::T) where {T} - a,b,c,d = maxtomin(x1,x2,x3,x4) - return fast_vecsum_errbranch(a,b,c,d) -end - diff --git a/src/doubletriple/triple.jl b/src/doubletriple/triple.jl index d4a014ad..c8f4538c 100644 --- a/src/doubletriple/triple.jl +++ b/src/doubletriple/triple.jl @@ -1,32 +1,3 @@ -function triple(::Type{T}, x::BigFloat) where {T<:IEEEFloat} - prec = precision(BigFloat) - setprecision(BigFloat, 768) - hi = T(x) - md = T(x - hi) - lo = T(x - hi - md) - setprecision(BigFloat, prec) - return hi, md, lo -end - -function triple(::Type{T}, x::String) where {T<:IEEEFloat} - prec = precision(BigFloat) - setprecision(BigFloat, 768) - z = parse(BigFloat, x) - hi = T(z) - md = T(z - hi) - lo = T(z - hi - md) - setprecision(BigFloat, prec) - return hi,md,lo -end - -triple64(x::BigFloat) = triple(Float64, x) -triple32(x::BigFloat) = triple(Float32, x) -triple16(x::BigFloat) = triple(Float16, x) - -triple64(x::String) = triple(Float64, x) -triple32(x::String) = triple(Float32, x) -triple16(x::String) = triple(Float16, x) - @inline function clean0s(hi::T, md::T, lo::T) where {T} if !iszero(hi) hi, md, lo @@ -45,102 +16,14 @@ end end end - -function triple_inv(::Type{T}, x::BigFloat) where {T<:IEEEFloat} - prec = precision(BigFloat) - setprecision(BigFloat, 768) - x = inv(x) - hi = T(x) - md = T(x - hi) - lo = T(x - hi - md) - setprecision(BigFloat, prec) - return hi,md,lo -end - -triple64inv(x::BigFloat) = triple_inv(Float64, x) -triple32inv(x::BigFloat) = triple_inv(Float32, x) -triple16inv(x::BigFloat) = triple_inv(Float16, x) - -#= -Basic building blocks for a triple-double intermediate format -Christoph Quirin Lauter -Thème NUM — Systèmes numériques, Projet Arénaire -Rapport de recherche n 5702 — Septembre 2005 — 67 pages -=# - -# Algorithm 3.3 [Triple-Double Building Blocks] - -function renorm(hi::T, md::T, lo::T) where {T<:AbstractFloat} - md, lo = two_sum(md, lo) - hi, m = two_sum(hi, md) - md, lo = two_sum(m, md) - return hi, md, lo -end - - -function renorm_hilo(hi::T, md::T, lo::T) where {T<:AbstractFloat} - md, lo = two_hilo_sum(md, lo) - hi, m = two_hilo_sum(hi, md) - md, lo = two_hilo_sum(m, md) - return hi, md, lo -end - -# Algorithm 4.1 (relerr 4.5 .. 16 u^2) [Triple-Double Building Blocks] - -function add222(ahilo, bhilo) - ahi, alo = ahilo - bhi, blo = bhilo - t1 = ahi + bhi - if abs(ahi) >= abs(bhi) - t2 = ahi - t1 - t3 = t2 + bhi - t4 = t3 + blo - t5 = t4 + alo - else - t2 = bhi - t1 - t3 = t2 + ahi - t4 = t3 + alo - t5 = t4 + blo - end - zhi, zlo = two_sum(t1, t5) - zhi, zlo = clean0s(zhi, zlo) - return zhi, zlo -end - """ _Basic building blocks for a triple-double intermediate format_ Christoph Quirin Lauter, 2005 +Thème NUM — Systèmes numériques, Projet Arénaire +Rapport de recherche n 5702 — Septembre 2005 — 67 pages """ TripleDouble_BuildingBlocks - -# Algorithm 4.6 (relerr 16 u^2) [Triple-Double Building Blocks] -""" - mul222( (ahi, alo), (bhi, blo) ) ↦ (chi, clo) - -### Preconditions -- abs(alo) ≤ abs(ahi) * pow(2, -53) -- abs(blo) ≤ abs(bhi) * pow(2, -53) - -### Postconditions -- abs(clo) ≤ abs(ahi) * pow(2, -53) -- chi + clo == ((ahi + alo) * (bhi + blo)) * (1 + ε), abs(ε) ≤ pow(2, -102) -- chi + clo == chi - -#### [TripleDouble_BuildingBlocks](@ref) -""" -function mul222(ahilo, bhilo) - ahi, alo = ahilo - bhi, blo = bhilo - t1, t2 = two_prod(ahi, bhi) - t3 = ahi * blo - t4 = alo * bhi - t5 = t3 + t4 - t6 = t2 + t5 - zhi, zlo = two_sum(t1, t6) - return zhi, zlo -end - # addition # Algorithm 5.1 (Add33) [Triple-Double Building Blocks] @@ -162,85 +45,6 @@ end return add333(a[1], a[2], a[3], b[1], b[2], b[3]) end -function add332(ahi::T, amd::T, alo::T, bhi::T, bmd::T, blo::T) where {T<:AbstractFloat} - zhi, t1 = two_sum(ahi, bhi) - t2, t3 = two_sum(amd, bmd) - t7, t4 = two_sum(t1, t2) - t6 = alo + blo - t5 = t3 + t4 - zmd = t5 + t6 - zmd += t7 - zhi, zmd = clean0s(zhi, zmd) - return zhi, zmd -end - -@inline function add332(a::Tuple{T,T,T}, b::Tuple{T,T,T}) where {T<:AbstractFloat} - return add332(a[1], a[2], a[3], b[1], b[2], b[3]) -end - -function add323(ahi::T, amd::T, alo::T, bhi::T, blo::T) where {T<:AbstractFloat} - zhi, t1 = two_sum(ahi, bhi) - t2, t3 = two_sum(amd, blo) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - t8 = t5 + alo - zmd, zlo = two_sum(t7, t8) - zhi, zmd, zlo = clean0s(zhi,zmd,zlo) - return zhi, zmd, zlo -end - -@inline function add323(a::Tuple{T,T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} - return add323(a[1], a[2], a[3], b[1], b[2]) -end - -function add322(ahi::T, amd::T, alo::T, bhi::T, blo::T) where {T<:AbstractFloat} - zhi, t1 = two_sum(ahi, bhi) - t2, t3 = two_sum(amd, blo) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zmd = t5 + alo - zmd += t7 - zhi, zmd = clean0s(zhi, zmd) - return zhi, zmd -end - -@inline function add322(a::Tuple{T,T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} - return add322(a[1], a[2], a[3], b[1], b[2]) -end - -function add223(ahi::T, amd::T, bhi::T, bmd::T) where {T<:AbstractFloat} - zhi, t1 = two_sum(ahi, bhi) - t2, t3 = two_sum(amd, bmd) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zmd, zlo = two_sum(t7, t5) - zhi, zmd, zlo = clean0s(zhi,zmd,zlo) - return zhi, zmd, zlo -end - -@inline function add223(a::Tuple{T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} - return add223(a[1], a[2], b[1], b[2]) -end - - -function add313(ahi::T, amd::T, alo::T, b::T) where {T<:AbstractFloat} - zhi, t1 = two_sum(ahi, b) - t7, t4 = two_sum(t1, amd) - t8 = t4 + alo - zmd, zlo = two_sum(t7, t8) - zhi, zmd, zlo = clean0s(zhi,zmd,zlo) - return zhi, zmd, zlo -end - -add133(b::T, ahi::T, amd::T, alo::T) where {T<:AbstractFloat} = add313(ahi, amd, alo, b) - -add313(a::NTuple{3,T}, b::NTuple{1,T}) where {T<:AbstractFloat} = - add313(a[1], a[2], a[3], b[1]) - -add133(b::NTuple{1,T}, a::NTuple{3,T}) where {T<:AbstractFloat} = - add313(a[1], a[2], a[3], b[1]) - - # subtraction function sub333(ahi::T, amd::T, alo::T, bhi::T, bmd::T, blo::T) where {T<:AbstractFloat} @@ -259,67 +63,6 @@ end return sub333(a[1], a[2], a[3], b[1], b[2], b[3]) end -function sub332(ahi::T, amd::T, alo::T, bhi::T, bmd::T, blo::T) where {T<:AbstractFloat} - zhi, t1 = two_diff(ahi, bhi) - t2, t3 = two_diff(amd, bmd) - t7, t4 = two_sum(t1, t2) - t6 = alo - blo - t5 = t3 + t4 - t8 = t5 + t6 - zmd = t7 + t8 - zhi, zmd = clean0s(zhi, zmd) - return zhi, zmd -end - -@inline function sub332(a::Tuple{T,T,T}, b::Tuple{T,T,T}) where {T<:AbstractFloat} - return sub332(a[1], a[2], a[3], b[1], b[2], b[3]) -end - -function sub323(ahi::T, amd::T, alo::T, bhi::T, bmd::T) where {T<:AbstractFloat} - zhi, t1 = two_diff(ahi, bhi) - t2, t3 = two_diff(amd, bmd) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - t8 = t5 + alo - zmd, zlo = two_sum(t7, t8) - zhi, zmd, zlo = clean0s(zhi,zmd,zlo) - return zhi, zmd, zlo -end - -@inline function sub323(a::Tuple{T,T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} - return sub323(a[1], a[2], a[3], b[1], b[2]) -end - -function sub322(ahi::T, amd::T, alo::T, bhi::T, bmd::T) where {T<:AbstractFloat} - zhi, t1 = two_diff(ahi, bhi) - t2, t3 = two_diff(amd, bmd) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zmd = t5 + alo - zmd += t7 - zhi, zmd = clean0s(zhi, zmd) - return zhi, zmd -end - -@inline function sub322(a::Tuple{T,T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} - return sub322(a[1], a[2], a[3], b[1], b[2]) -end - -function sub233(ahi::T, amd::T, bhi::T, bmd::T, blo::T) where {T<:AbstractFloat} - zhi, t1 = two_diff(ahi, bhi) - t2, t3 = two_diff(amd, bmd) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - t8 = t5 - blo - zmd, zlo = two_sum(t7, t8) - zhi, zmd, zlo = clean0s(zhi,zmd,zlo) - return zhi, zmd, zlo -end - -@inline function sub233(a::Tuple{T,T}, b::Tuple{T,T,T}) where {T<:AbstractFloat} - return sub233(a[1], a[2], b[1], b[2], b[3]) -end - function sub232(ahi::T, amd::T, bhi::T, bmd::T, blo::T) where {T<:AbstractFloat} zhi, t1 = two_diff(ahi, bhi) t2, t3 = two_diff(amd, bmd) @@ -335,64 +78,6 @@ end return sub232(a[1], a[2], b[1], b[2], b[3]) end -function sub223(ahi::T, amd::T, bhi::T, bmd::T) where {T<:AbstractFloat} - zhi, t1 = two_diff(ahi, bhi) - t2, t3 = two_diff(amd, bmd) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zmd, zlo = two_sum(t7, t5) - zhi, zmd, zlo = clean0s(zhi,zmd,zlo) - return zhi, zmd, zlo -end - -@inline function sub223(a::Tuple{T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} - return sub223(a[1], a[2], b[1], b[2]) -end - - -function sub313(ahi::T, amd::T, alo::T, b::T) where {T<:AbstractFloat} - zhi, t1 = two_diff(ahi, b) - t7, t4 = two_sum(t1, amd) - t8 = t4 + alo - zmd, zlo = two_sum(t7, t8) - zhi, zmd, zlo = clean0s(zhi,zmd,zlo) - return zhi, zmd, zlo -end - -function sub312(ahi::T, amd::T, alo::T, b::T) where {T<:AbstractFloat} - zhi, t1 = two_diff(ahi, b) - t7, t4 = two_sum(t1, amd) - zlo = t4 + alo - zlo += t7 - zhi, zlo = clean0s(zhi, zlo) - return zhi, zlo -end - -sub313(a::NTuple{3,T}, b::NTuple{1,T}) where {T<:AbstractFloat} = - sub313(a[1], a[2], a[3], b) - -sub312(a::NTuple{3,T}, b::NTuple{1,T}) where {T<:AbstractFloat} = - sub312(a[1], a[2], a[3], b) - - -# Algorithm 6.1 [Triple-Double Building Blocks] -# (ahi,alo) * (bhi,blo) :: (zhi,zmd,zlo) - -function mul223(ahi::T, alo::T, bhi::T, blo::T) where {T<:AbstractFloat} - zhi, t1 = two_prod(ahi, bhi) - t2, t3 = two_prod(ahi, blo) - t4, t5 = two_prod(alo, bhi) - t6 = alo * blo - t7, t8 = two_sum(t2, t3, t4, t5) - t9, t10 = two_sum(t1, t6) - zmd, zlo = two_sum(t7, t8, t9, t10) - return zhi, zmd, zlo -end - -@inline function mul223(a::Tuple{T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} - return mul223(a[1], a[2], b[1], b[2]) -end - function mul323(a::Tuple{T,T,T}, b::Tuple{T,T}) where {T<:AbstractFloat} ahi, amd, alo = a bhi, blo = b @@ -470,10 +155,6 @@ mul322(a::Tuple{Float64, Float64, Float64}, b::Tuple{Float32, Float32}) = mul322(a::Tuple{Float32, Float32, Float32}, b::Tuple{Float64, Float64}) = mul322((Float64(a[1]), Float64(a[2]), Float64(a[3])), b) - -mul233(a::Tuple{T,T}, b::Tuple{T,T,T}) where {T<:AbstractFloat} = - mul323(b,a) - mul232(a::Tuple{T,T}, b::Tuple{T,T,T}) where {T<:AbstractFloat} = mul322(b,a) @@ -512,101 +193,3 @@ mul333(a::Tuple{Float64, Float64, Float64}, b::Tuple{Float32, Float32, Float32}) mul333(a, (Float64(b[1]), Float64(b[2]), Float64(b[3])) ) mul333(a::Tuple{Float32, Float32, Float32}, b::Tuple{Float64, Float64, Float64}) = mul333((Float64(a[1]), Float64(a[2]), Float64(a[3])), b) - - -function mul332(a::Tuple{T,T,T}, b::Tuple{T,T,T}) where {T<:AbstractFloat} - ahi, amd, alo = a - bhi, bmd, blo = b - - hi,t1 = two_prod(ahi, bhi) - t2,t3 = two_prod(ahi, bmd) - t4,t5 = two_prod(amd, bhi) - t6,t7 = two_prod(amd, bmd) - - t8 = ahi * blo - t9 = alo * bhi - t10 = amd * blo - t11 = alo * bmd - - t14, lo = two_hilo_sum(t1, t6) - - lo += t7 - lo += t8 + t9 - lo += t10 + t11 - lo += t16 + t17 - lo += t14 - lo += t5 + t4 - lo += t3 + t2 - - return hi, lo -end - -mul332(a::Tuple{Float64, Float64, Float64}, b::Tuple{Float32, Float32, Float32}) = - mul332(a, (Float64(b[1]), Float64(b[2]), Float64(b[3])) ) -mul332(a::Tuple{Float32, Float32, Float32}, b::Tuple{Float64, Float64, Float64}) = - mul332((Float64(a[1]), Float64(a[2]), Float64(a[3])), b) - -function mul313(a::Tuple{T,T,T}, b::Tuple{T}) where {T<:AbstractFloat} - ahi, amd, alo = a - bhi = b[1] - p0,q0 = two_prod(ahi, bhi) - p2,q2 = two_prod(amd, bhi) - p5,q5 = two_prod(alo, bhi) - - # Start Accumulation - p1,p2 = two_sum(p2, q0) - - p2,q1 = two_sum(p2, q2) - - s0,t0 = two_sum(p2, p5) - s1,t0 = two_sum(q1, t0) - - # O(eps^3) order terms - s1 += q5 - #p0,p1,s0 = renormAs3(p0, p1, s0, s1+s2) - s1 += t0 - s0,s1 = two_hilo_sum(s0,s1) - p1,s0 = two_hilo_sum(p1,s0) - p0,p1 = two_hilo_sum(p0,p1) - - return p0,p1,s0 -end - - -mul133(a::NTuple{1,T}, b::NTuple{3,T}) where {T<:AbstractFloat} = mul313(b, a) - - -function mul312(a::Tuple{T,T,T}, b::Tuple{T}) where {T<:AbstractFloat} - ahi, amd, alo = a - bhi = b[1] - p0,q0 = two_prod(ahi, bhi) - p2,q2 = two_prod(amd, bhi) - p5,q5 = two_prod(alo, bhi) - - # Start Accumulation - p1,p2 = two_sum(p2, q0) - - p2,q1 = two_sum(p2, q2) - - s0,t0 = two_sum(p2, p5) - s1,t0 = two_sum(q1, t0) - - # O(eps^3) order terms - s1 += q5 - #p0,p1,s0 = renormAs3(p0, p1, s0, s1+s2) - s1 += t0 - p1 = p1 + (s0 + s1) - p0,p1 = two_hilo_sum(p0,p1) - - return p0,p1 -end - - -mul132(a::NTuple{1,T}, b::NTuple{3,T}) where {T<:AbstractFloat} = mul312(b, a) - - -function muladd222(a::Tuple{T,T}, b::Tuple{T,T}, c::Tuple{T,T}) where {T<:AbstractFloat} - m = mul223(a, b) - hi, lo = add322(m, c) - return hi, lo -end diff --git a/src/doubletriple/triple_consts.jl b/src/doubletriple/triple_consts.jl index 03b2149c..5a59ac37 100644 --- a/src/doubletriple/triple_consts.jl +++ b/src/doubletriple/triple_consts.jl @@ -1,163 +1,9 @@ -const one_t64 = (1.0, 0.0, 0.0) - -const pi_4o1_t64 = (12.56637061435917200, 4.8985871965894130e-16, -1.1979079238873359e-32 ) const pi_2o1_t64 = ( 6.28318530717958600, 2.4492935982947064e-16, -5.9895396194366790e-33 ) const pi_1o1_t64 = ( 3.14159265358979300, 1.2246467991473532e-16, -2.9947698097183397e-33 ) const pi_1o2_t64 = ( 1.57079632679489660, 6.1232339957367660e-17, -1.4973849048591698e-33 ) -const pi_15o32_t64 = ( 1.47262155637021560, -5.3616983752483473e-17, -6.3342637055057730e-34 ) -const pi_7o16_t64 = ( 1.37444678594553450, 5.3578297462696700e-17, 2.3053216375801512e-34 ) -const pi_1o3_t64 = ( 1.04719755119659790, -1.0720817664510910e-16, -9.9825660323944640e-34 ) const pi_1o4_t64 = ( 0.78539816339744830, 3.0616169978683830e-17, -7.4869245242958490e-34 ) -const pi_1o6_t64 = ( 0.52359877559829890, -5.3604088322554550e-17, -4.9912830161972320e-34 ) -const pi_1o8_t64 = ( 0.39269908169872414, 1.5308084989341915e-17, -3.7434622621479246e-34 ) -const pi_1o12_t64 = ( 0.26179938779914946, -2.6802044161277275e-17, -2.4956415080986160e-34 ) -const pi_1o16_t64 = ( 0.19634954084936207, 7.6540424946709580e-18, -1.8717311310739623e-34 ) -const pi_1o24_t64 = ( 0.13089969389957473, -1.3401022080638637e-17, -1.2478207540493080e-34 ) -const pi_1o32_t64 = ( 0.09817477042468103, 3.8270212473354790e-18, -9.3586556553698110e-35 ) -const pi_1o64_t64 = ( 0.04908738521234052, 1.9135106236677394e-18, -4.6793278276849057e-35 ) -const pi_1o128_t64 = ( 0.02454369260617026, 9.5675531183386970e-19, -2.3396639138424529e-35 ) -const pi_1o256_t64 = ( 0.01227184630308513, 4.7837765591693480e-19, -1.1698319569212264e-35 ) -const pi_1o512_t64 = ( 0.006135923151542565, 2.3918882795846740e-19, -5.8491597846061320e-36 ) -const pi_1o1024_t64 = ( 0.0030679615757712823, 1.195944139792337e-19, -2.9245798923030660e-36 ) -const pi_1o2048_t64 = ( 0.0015339807878856412, 5.979720698961686e-20, -1.4622899461515330e-36 ) -const inv_pi_4o1_t64 = ( 0.07957747154594767, -4.9196691687956215e-18, -2.6803590707232510e-34 ) const inv_pi_2o1_t64 = ( 0.15915494309189535, -9.8393383375912430e-18, -5.3607181414465020e-34 ) const inv_pi_1o1_t64 = ( 0.31830988618379070, -1.9678676675182486e-17, -1.0721436282893004e-33 ) const inv_pi_1o2_t64 = ( 0.63661977236758140, -3.9357353350364970e-17, -2.1442872565786008e-33 ) -const inv_pi_15o32_t64 = ( 0.67906109052542010, 2.4632204570453420e-17, 9.9968069807037510e-34 ) -const inv_pi_7o16_t64 = ( 0.72756545413437870, 2.6011543692324530e-18, 1.3563477494445884e-34 ) -const inv_pi_1o3_t64 = ( 0.95492965855137200, -3.5248787942896340e-18, -1.3494297384832360e-34 ) const inv_pi_1o4_t64 = ( 1.27323954473516280, -7.8714706700729940e-17, -4.2885745131572016e-33 ) -const inv_pi_1o6_t64 = ( 1.90985931710274400, -7.0497575885792670e-18, -2.6988594769664720e-34 ) -const inv_pi_1o8_t64 = ( 2.54647908947032550, -1.5742941340145989e-16, -8.5771490263144030e-33 ) -const inv_pi_1o12_t64 = ( 3.81971863420548800, -1.4099515177158535e-17, -5.3977189539329440e-34 ) -const inv_pi_1o16_t64 = ( 5.09295817894065100, -3.1485882680291977e-16, -1.7154298052628806e-32 ) -const inv_pi_1o24_t64 = ( 7.63943726841097600, -2.8199030354317070e-17, -1.0795437907865888e-33 ) -const inv_pi_1o32_t64 = ( 10.18591635788130200, -6.2971765360583950e-16, -3.4308596105257613e-32 ) -const inv_pi_1o64_t64 = ( 20.37183271576260400, -1.2594353072116790e-15, -6.8617192210515230e-32 ) -const inv_pi_1o128_t64 = ( 40.74366543152521000, -2.5188706144233580e-15, -1.3723438442103045e-31 ) -const inv_pi_1o256_t64 = ( 81.48733086305042000, -5.0377412288467160e-15, -2.7446876884206090e-31 ) -const inv_pi_1o512_t64 = ( 162.97466172610083000, -1.0075482457693433e-14, -5.4893753768412180e-31 ) -const inv_pi_1o1024_t64 = ( 325.94932345220167000, -2.0150964915386866e-14, -1.0978750753682436e-30 ) -const inv_pi_1o2048_t64 = ( 651.89864690440330000, -4.0301929830773730e-14, -2.1957501507364872e-30 ) - - -const sin_pi_1o3_t64 = (0.8660254037844386, 5.0175421109034514e-17, -7.479771237866948e-34) -const cos_pi_1o3_t64 = (0.5, 0.0, 0.0) -const tan_pi_1o3_t64 = (1.7320508075688772, 1.0035084221806903e-16, -1.4959542475733896e-33) -const csc_pi_1o3_t64 = (1.1547005383792515, 6.690056147871269e-17, -5.105953379741696e-33) -const sec_pi_1o3_t64 = (2.0, 0.0, 0.0) -const cot_pi_1o3_t64 = (0.5773502691896257, 3.3450280739356345e-17, -2.552976689870848e-33) - -const sin_pi_1o4_t64 = (0.7071067811865476, -4.833646656726457e-17, 2.0693376543497068e-33) -const cos_pi_1o4_t64 = (0.7071067811865476, -4.833646656726457e-17, 2.0693376543497068e-33) -const tan_pi_1o4_t64 = (1.0, 0.0, 0.0) -const csc_pi_1o4_t64 = (1.4142135623730951, -9.667293313452913e-17, 4.1386753086994136e-33) -const sec_pi_1o4_t64 = (1.4142135623730951, -9.667293313452913e-17, 4.1386753086994136e-33) -const cot_pi_1o4_t64 = (1.0, 0.0, 0.0) - -const sin_pi_1o5_t64 = (0.5877852522924731, -7.93475083819002e-18, 3.2552371219323954e-34) -const cos_pi_1o5_t64 = (0.8090169943749475, -2.716057601841253e-17, 1.3271626041907827e-33) -const tan_pi_1o5_t64 = (0.7265425280053609, -1.3283798985961028e-17, 1.1900616500893826e-34) -const csc_pi_1o5_t64 = (1.7013016167040798, 1.0886984134897833e-16, -1.0200333713443962e-33) -const sec_pi_1o5_t64 = (1.2360679774997898, -1.0864230407365012e-16, 5.308650416763131e-33) -const cot_pi_1o5_t64 = (1.3763819204711736, -4.287034122518378e-17, 9.00390429616504e-34) - -const sin_pi_1o6_t64 = (0.5, 0.0, 0.0) -const cos_pi_1o6_t64 = (0.8660254037844386, 5.0175421109034514e-17, -7.479771237866948e-34) -const tan_pi_1o6_t64 = (0.5773502691896257, 3.3450280739356345e-17, -2.552976689870848e-33) -const csc_pi_1o6_t64 = (2.0, 0.0, 0.0) -const sec_pi_1o6_t64 = (1.1547005383792515, 6.690056147871269e-17, -5.105953379741696e-33) -const cot_pi_1o6_t64 = (1.7320508075688772, 1.0035084221806903e-16, -1.4959542475733896e-33) - -const sin_pi_1o7_t64 = (0.4338837391175581, 7.407189078946677e-20, -1.2028582333265511e-36) -const cos_pi_1o7_t64 = (0.9009688679024191, -1.9762646853069492e-17, 9.897202619545834e-34) -const tan_pi_1o7_t64 = (0.48157461880752866, -2.056669142326466e-17, -3.854197091963584e-34) -const csc_pi_1o7_t64 = (2.3047648709624866, -1.062197674057547e-16, 4.368038423415315e-33) -const sec_pi_1o7_t64 = (1.1099162641747424, -4.564997930888251e-17, -5.941617871131375e-34) -const cot_pi_1o7_t64 = (2.0765213965723364, 2.0289117907376653e-16, -9.25927028463036e-33) - -const sin_pi_1o8_t64 = (0.3826834323650898, -1.0050772696461588e-17, -2.0605316302806695e-34) -const cos_pi_1o8_t64 = (0.9238795325112867, 1.7645047084336677e-17, -5.044253732158682e-34) -const tan_pi_1o8_t64 = (0.41421356237309503, 1.4349369327986523e-17, 1.0571873976798362e-33) -const csc_pi_1o8_t64 = (2.613125929752753, -9.583375368676548e-17, -1.4209570724878703e-33) -const sec_pi_1o8_t64 = (1.082392200292394, -5.563066290091912e-17, -5.9674442037560255e-34) -const cot_pi_1o8_t64 = (2.414213562373095, 1.2537167179050217e-16, 4.1386753086994136e-33) - -const sin_pi_1o12_t64 = (0.25881904510252074, 2.287249500495561e-17, 1.0253436680177266e-33) -const cos_pi_1o12_t64 = (0.9659258262890683, -2.5463971562308955e-17, 1.3193411347856062e-35) -const tan_pi_1o12_t64 = (0.2679491924311227, 1.0671460244446628e-17, -4.4789707936399173e-35) -const csc_pi_1o12_t64 = (3.8637033051562732, -1.0185588624923582e-16, 5.277364539142425e-35) -const sec_pi_1o12_t64 = (1.035276180410083, 9.148998001982243e-17, 4.1013746720709066e-33) -const cot_pi_1o12_t64 = (3.732050807568877, 1.0035084221806903e-16, -1.4959542475733896e-33) - -const sin_pi_1o15_t64 = (0.20791169081775934, -5.47375691962595e-18, -2.1547502853552625e-35) -const cos_pi_1o15_t64 = (0.9781476007338057, -5.0904377976839195e-17, -1.154640558932597e-33) -const tan_pi_1o15_t64 = (0.21255656167002213, -8.27484570190349e-19, 4.655808820295626e-36) -const csc_pi_1o15_t64 = (4.809734344744131, -5.569426258316774e-17, 4.547378632737873e-33) -const sec_pi_1o15_t64 = (1.0223405948650293, 1.1456987869876802e-17, 5.239963357056466e-34) -const cot_pi_1o15_t64 = (4.704630109478455, -3.8014994127817193e-16, 9.374806675690387e-33) - -const sin_pi_1o16_t64 = (0.19509032201612828, -7.991079068461731e-18, 6.184627002422071e-34) -const cos_pi_1o16_t64 = (0.9807852804032304, 1.8546939997825006e-17, -1.0696564445530757e-33) -const tan_pi_1o16_t64 = (0.198912367379658, 8.391794477636538e-19, 2.5564457535700006e-35) -const csc_pi_1o16_t64 = (5.125830895483013, -3.8148564901441474e-16, -1.4181052667649092e-32) -const sec_pi_1o16_t64 = (1.0195911582083184, -2.2083968146912082e-17, -1.4352691750559562e-33) -const cot_pi_1o16_t64 = (5.027339492125848, 2.95379181037367e-17, 2.7177182362115432e-33) - -const sin_pi_1o24_t64 = (0.1305261922200516, -9.607666023901524e-18, -6.716953849066469e-34) -const cos_pi_1o24_t64 = (0.9914448613738104, 2.898989874454537e-17, -5.34967350433339e-34) -const tan_pi_1o24_t64 = (0.13165249758739586, -7.917699157902454e-18, 7.983937455025082e-36) -const csc_pi_1o24_t64 = (7.661297575540389, -2.5754942653453446e-16, 2.049433239355979e-32) -const sec_pi_1o24_t64 = (1.0086289605801526, 1.092498757722754e-16, 1.8625688875143196e-33) -const cot_pi_1o24_t64 = (7.59575411272515, -1.505044031166795e-18, -9.502964111090016e-35) - -const sin_pi_1o36_t64 = (0.08715574274765818, -6.189574214131301e-18, -2.9880893964585044e-34) -const cos_pi_1o36_t64 = (0.9961946980917455, -1.2903694855897886e-17, -2.0850330180951652e-34) -const tan_pi_1o36_t64 = (0.08748866352592401, -1.7475065678744648e-18, -6.573564498151167e-35) -const csc_pi_1o36_t64 = (11.473713245669854, 8.409824359888741e-16, -4.2775072900091694e-32) -const sec_pi_1o36_t64 = (1.0038198375433474, 2.988608889329276e-17, -1.592359231061579e-33) -const cot_pi_1o36_t64 = (11.430052302761343, 8.11514658899593e-17, -7.42810733044527e-34) - -const sin_pi_1o48_t64 = (0.06540312923014306, 5.136633854723179e-18, 1.8634981210190512e-34) -const cos_pi_1o48_t64 = (0.9978589232386035, 2.7660245814577704e-17, 1.4578690036224136e-33) -const tan_pi_1o48_t64 = (0.06554346281523823, -6.244201962707419e-18, 3.670976186047138e-34) -const csc_pi_1o48_t64 = (15.289788298678511, 7.267038125576664e-16, 2.6974131728868103e-32) -const sec_pi_1o48_t64 = (1.0021456708072995, 6.035414166361829e-17, 2.9457074038261743e-33) -const cot_pi_1o48_t64 = (15.25705168826554, -2.5905447056570125e-16, 1.9051151791377825e-32) - -const sin_pi_1o64_t64 = (0.049067674327418015, -6.79610372051828e-19, -4.4318868124718325e-35) -const cos_pi_1o64_t64 = (0.9987954562051724, -1.2291693337075465e-17, 2.446844678649127e-34) -const tan_pi_1o64_t64 = (0.049126849769467254, 9.097765655528944e-20, -4.7470025699432424e-36) -const csc_pi_1o64_t64 = (20.380016247096112, 1.6969944228826224e-15, -2.1307108889014292e-32) -const sec_pi_1o64_t64 = (1.0012059964703925, 5.748536721404889e-17, 2.6420799527691316e-33) -const cot_pi_1o64_t64 = (20.355467624987188, -2.3792881581892444e-17, 5.288182696219774e-34) - -const sin_pi_1o128_t64 = (0.024541228522912288, -9.186849012577878e-20, 4.870614870446706e-36) -const cos_pi_1o128_t64 = (0.9996988186962042, -2.985148640379975e-17, -1.9084787370733737e-33) -const tan_pi_1o128_t64 = (0.024548622108925444, -5.838370447784443e-20, -6.964141459827823e-37) -const csc_pi_1o128_t64 = (40.74775633446287, -2.5217154537342434e-15, -1.4049982810542148e-31) -const sec_pi_1o128_t64 = (1.000301272041302, -6.001229236340269e-17, -3.315945536296742e-33) -const cot_pi_1o128_t64 = (40.7354838720833, 1.6732015413007298e-15, 6.550337088915585e-32) - -const sin_pi_1o512_t64 = (0.006135884649154475, 9.054525748247493e-20, 1.626011313374532e-37) -const cos_pi_1o512_t64 = (0.9999811752826011, 3.3568103522895585e-17, -1.4740132559368063e-35) -const tan_pi_1o512_t64 = (0.006136000157623402, -1.746345263565922e-19, 4.277381971459752e-36) -const csc_pi_1o512_t64 = (162.9756843844514, -1.222532369546018e-14, 1.3070853382863645e-31) -const sec_pi_1o512_t64 = (1.0000188250717754, 9.449694210745273e-17, -7.377442746575774e-34) -const cot_pi_1o512_t64 = (162.97261641324997, -3.0164596628737455e-15, -1.55236796149015e-31) - -const sin_pi_1o1024_t64 = (0.003067956762965976, 1.2690279085455925e-19, 5.287946424532839e-36) -const cos_pi_1o1024_t64 = (0.9999952938095762, -1.966806428532219e-17, -6.305395509588348e-34) -const tan_pi_1o1024_t64 = (0.003067971201422665, -8.446036355314405e-20, 4.918546909029024e-36) -const csc_pi_1o1024_t64 = (325.9498347796924, 1.4061463896494086e-14, 4.991692857301745e-31) -const sec_pi_1o1024_t64 = (1.000004706212572, 6.900021265655887e-17, -4.061965915398279e-33) -const cot_pi_1o1024_t64 = (325.94830079770134, 1.3179926072070083e-14, -2.4528262320378532e-32) - -const sin_pi_1o2048_t64 = (0.0015339801862847657, -1.0467712971596958e-19, 2.4745604308673836e-36) -const cos_pi_1o2048_t64 = (0.9999988234517019, 3.067881716126108e-17, -2.6247039963835992e-33) -const tan_pi_1o2048_t64 = (0.0015339819910886664, 8.145819328928074e-20, 1.0849852064036664e-36) -const csc_pi_1o2048_t64 = (651.8989025679381, 4.950041690164551e-14, 1.4246444414492298e-30) -const sec_pi_1o2048_t64 = (1.0000011765496823, 1.0780476438354186e-17, -2.862837848637511e-35) -const cot_pi_1o2048_t64 = (651.8981355773938, 2.724138996856417e-14, 4.74641023409796e-31) diff --git a/src/doubletriple/triple_pi.jl b/src/doubletriple/triple_pi.jl index 93d62472..e13aad0d 100644 --- a/src/doubletriple/triple_pi.jl +++ b/src/doubletriple/triple_pi.jl @@ -1,26 +1,6 @@ """ - twopi_minus_x(x) is 2π - x -""" twopi_minus_x - -""" - onepi_minus_x(x) is π - x -""" onepi_minus_x - -""" - halfpi_minus_x(x) is π/2 - x -""" halfpi_minus_x - -""" - qrtrpi_minus_x(x) is π/4 - x -""" qrtrpi_minus_x - -""" - x_minus_twopi(x) is x - 2π -""" x_minus_twopi - -""" - x_minus_pi(x) is x - π -""" x_minus_pi + x_minus_onepi(x) is x - π +""" x_minus_onepi """ x_minus_halfpi(x) is x - π/2 @@ -30,68 +10,10 @@ x_minus_qrtrpi(x) is x - π/4 """ x_minus_qrtrpi - -twopi_minus_x(x::DoubleFloat{Float64}) = twopi_minus_x(HI(x), LO(x)) -onepi_minus_x(x::DoubleFloat{Float64}) = onepi_minus_x(HI(x), LO(x)) -halfpi_minus_x(x::DoubleFloat{Float64}) = halfpi_minus_x(HI(x), LO(x)) -qrtrpi_minus_x(x::DoubleFloat{Float64}) = qrtrpi_minus_x(HI(x), LO(x)) - -x_minus_twopi(x::DoubleFloat{Float64}) = x_minus_twopi(HI(x), LO(x)) x_minus_onepi(x::DoubleFloat{Float64}) = x_minus_onepi(HI(x), LO(x)) x_minus_halfpi(x::DoubleFloat{Float64}) = x_minus_halfpi(HI(x), LO(x)) x_minus_qrtrpi(x::DoubleFloat{Float64}) = x_minus_qrtrpi(HI(x), LO(x)) -function twopi_minus_x(hi::T, lo::T) where {T<:Float64} - zhi, t1 = two_diff(6.28318530717958600, hi) - t2, t3 = two_diff(2.4492935982947064e-16, lo) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zlo = t5 - 5.9895396194366790e-33 - zlo += t7 - return DoubleFloat{Float64}(zhi, zlo) -end - -function onepi_minus_x(hi::T, lo::T) where {T<:Float64} - zhi, t1 = two_diff(3.14159265358979300, hi) - t2, t3 = two_diff(1.2246467991473532e-16, lo) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zlo = t5 - 2.9947698097183397e-33 - zlo += t7 - return DoubleFloat{Float64}(zhi, zlo) -end - -function halfpi_minus_x(hi::T, lo::T) where {T<:Float64} - zhi, t1 = two_diff(1.57079632679489660, hi) - t2, t3 = two_diff(6.1232339957367660e-17, lo) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zlo = t5 - 1.4973849048591698e-33 - zlo += t7 - return DoubleFloat{Float64}(zhi, zlo) -end - -function qrtrpi_minus_x(hi::T, lo::T) where {T<:Float64} - zhi, t1 = two_diff(0.78539816339744830, hi) - t2, t3 = two_diff(3.0616169978683830e-17, lo) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zlo = t5 - 7.4869245242958490e-34 - zlo += t7 - return DoubleFloat{Float64}(zhi, zlo) -end - - -function x_minus_twopi(hi::T, lo::T) where {T<:Float64} - zhi, t1 = two_diff(6.28318530717958600, hi) - t2, t3 = two_diff(2.4492935982947064e-16, lo) - t7, t4 = two_sum(t1, t2) - t5 = t3 + t4 - zlo = t5 - 5.9895396194366790e-33 - zlo += t7 - return DoubleFloat{Float64}(-zhi, -zlo) -end - function x_minus_onepi(hi::T, lo::T) where {T<:Float64} zhi, t1 = two_diff(3.14159265358979300, hi) t2, t3 = two_diff(1.2246467991473532e-16, lo) diff --git a/src/doubletriple/tripleword.jl b/src/doubletriple/tripleword.jl deleted file mode 100644 index b234b82c..00000000 --- a/src/doubletriple/tripleword.jl +++ /dev/null @@ -1,150 +0,0 @@ - -#= - Algorithms for triple-word arithmetic - Nicolas Fabiano, Jean-Michel Muller, Joris Picot -=# - -function vec_sum(x0::T, x1::T, x2::T) where {T} - s1, e2 = two_sum(x1, x2) - s0, e1 = two_sum(x0, s1) - return s0,e1,e2 -end - - -function vecsum(x0::T,x1::T,x2::T) where {T} - iszero(x0) && return (two_sum(x1, x2)..., 0.0) - s1, e2 = two_sum(x1, x2) - e0, e1 = two_sum(x0, s1) - return (e0, e1, e2) -end - -function vseb(a0::T, a1::T, a2::T, a3::T, a4::T, a5::T) where {T} - iszero(a2) && return (a0, a1, a2) - r0, e1 = two_sum(a0, a1) - if e1 != 0.0 - y0 = r0 - else - e1 = r0 - end - y1, y2 = two_sum(e1, a2) - return (y0, y1, y2) -end - -function vseb(a0::T, a1::T, a2::T) where {T} - iszero(a2) && return (a0, a1, a2) - r0, e1 = two_sum(a0, a1) - if e1 != 0.0 - y0 = r0 - else - e1 = r0 - end - y1, y2 = two_sum(e1, a2) - return (y0, y1, y2) -end - -function vecseb(x0::T, x1::T, x2::T) where {T} - a0, a1, a2 = vecsum(x0, x1, x2) - return vseb(a0, a1, a2) -end - -function tripleword(a::T, b::T, c::T) where {T} - d0, d1 = two_sum(a,b) - # e0, e1, e2 = vecsum(d0, d1, c) - s1, e2 = two_sum(d1, c) - e0, e1 = two_sum(d0, s1) - # s0, s1, s2 = vseb(e0, e1, e2) - r0, e1 = two_sum(e0, e1) - if e1 != 0.0 - s0 = r0 - else - e1 = r0 - end - s1, s2 = two_sum(e1, e2) - return (s0, s1, s2) -end - -using SetRounding - -function Float64(x0::Float64, x1::Float64, x2::Float64) - s, e = two_hilo_sum(x0, x1+x1) - if e !== 0.0 - y = x0 + x1 - elseif x2 > 0.0 - setrounding(Float64, RoundUp) - y = x0 + x1 - setrounding(Float64, RoundNearest) - elseif x2 < 0.0 - setrounding(Float64, RoundDown) - y = x0 + x1 - setrounding(Float64, RoundNearest) - else - y = x0 + x1 - end - return y -end - - -@inline max_min(x,y) = ifelse(abs(y) < abs(x), (x,y) , (y,x)) - -function maxtomin(a::T, b::T, c::T) where {T} - b, c = max_min(b, c) - a, c = max_min(a, c) - a, b = max_min(a, b) - return a, b, c -end - -function vec_sum(x1::T, x2::T, x3::T) where {T} - s3 = x3 - s2, e3 = two_sum(x2, s3) - s1, e2 = two_sum(x1, s2) - return s1,e2,e3 -end - -function vecsum_errbranch(x::NTuple{3,T}) where {T} - y = r = e = zeros(T, 3) - j = 1 - e[1] = x[1] - for i = 1:1 - r[i], t = two_sum(e[i], x[i+1]) - if t !== zero(T) - y[j] = r[i] - e[i+1] = t - j += 1 - else - e[i+1] = r[i] - end - end - y[j], y[j+1] = two_sum(e[3], x[4]) - return y -end - -function fast_vecsum_errbranch(x1::T,x2::T,x3::T) where {T} - y = zeros(T, 3) - j = 1 - # e[1] = x1 - # i = 1 - r, t = two_sum(x1, x2) - if t !== zero(T) - y[j] = r - e = t - j += 1 - else - e = r - end - - y[j], y[j+1] = two_sum(e, x3) - return y -end - -function tripleword(x1::T, x2::T, x3::T) where {T} - a1, a2 = two_sum(x1, x2) - c1, c2 = two_sum(a1, x3) - e1to3 = vec_sum(c1,c2,a2) - y = vecsum_errbranch(e1to3) - return (y...,) -end - -@inline function fast_tripleword(x1::T, x2::T, x3::T) where {T} - a,b,c,d = maxtomin(x1,x2,x3) - return fast_vecsum_errbranch(a,b,c) -end diff --git a/src/math/arithmetic/modpi.jl b/src/math/arithmetic/modpi.jl index 2f252d43..7bf12ee3 100644 --- a/src/math/arithmetic/modpi.jl +++ b/src/math/arithmetic/modpi.jl @@ -94,68 +94,6 @@ function rem2pi(x::DoubleFloat{T}) where {T<:IEEEFloat} return HI(x) < 0 ? -m : m end -function rem1pi(x::DoubleFloat{T}) where {T<:IEEEFloat} - m = mod1pi(x) - return HI(x) < 0 ? -m : m -end - -function remhalfpi(x::DoubleFloat{T}) where {T<:IEEEFloat} - m = modhalfpi(x) - return HI(x) < 0 ? -m : m -end - -function remqrtrpi(x::DoubleFloat{T}) where {T<:IEEEFloat} - m = modqrtrpi(x) - return HI(x) < 0 ? -m : m -end - - - -""" - negpi_pospi(x) - - x --> [-pi..+pi) -""" -function negpi_pospi(x::DoubleFloat{T}) where {T<:IEEEFloat} - result = mod2pi(x) - if result >= DoubleFloat{T}(pi) - result = result - 2*DoubleFloat{T}(pi) - end - return result -end - - - -# in -pi..+pi -# determine nearest multiple of pi/2 -# within (pi/2) quadrant determine nearest multiple of pi/16 - -function whichquadrant(x::DoubleFloat{T}) where {T<:IEEEFloat} - x = negpi_pospi(x) - if signbit(x) # quadrant -2 or -1 - quadrant = -x < DoubleFloat{T}(pi)/2 ? -2 : -1 - else # quadrant 1 or 2 - quadrant = x < DoubleFloat{T}(pi)/2 ? 1 : 2 - end - return quadrant -end - -function which64th(x::DoubleFloat{Float64}, quadrant::Int) - y = T(mul232(HILO(x), nv_pi_1o2_t64)) - z = trunc(Int, y*4) - return z -end -function which64th(x::DoubleFloat{Float32}, quadrant::Int) - y = T(mul232(HILO(x), nv_pi_1o2_t32)) - z = trunc(Int, y*4) - return z -end -function which64th(x::DoubleFloat{Float16}, quadrant::Int) - y = T(mul232(HILO(x), nv_pi_1o2_t16)) - z = trunc(Int, y*4) - return z -end - # ========================================== ^^^^^ ========================================= const pi_1o1_t64_hi = pi_1o1_t64[1] @@ -166,16 +104,6 @@ const pi_1o4_t64_hi = pi_1o4_t64[1] const pi_1o4_t64_md = pi_1o4_t64[2] const pi_1o4_t64_lo = pi_1o4_t64[3] -function value_minus_pi(x::DoubleFloat{Float64}) - hi, lo = sub232(HI(x), LO(x), pi_1o1_t64_hi, pi_1o1_t64_md, pi_1o1_t64_lo) - return DoubleFloat{Float64}(hi, lo) -end - -function value_minus_qrtrpi(x::DoubleFloat{Float64}) - hi, lo = sub232(HI(x), LO(x), pi_1o4_t64_hi, pi_1o4_t64_md, pi_1o4_t64_lo) - return DoubleFloat{Float64}(hi, lo) -end - function mod1pipm(x::DoubleFloat{Float64}) abs(x) < onepi_d64 && return x w1 = mul323(inv_pi_1o1_t64, HILO(x)) @@ -188,32 +116,6 @@ end mod1pipm(x::DoubleFloat{Float32}) = DoubleFloat{Float32}(mod1pipm(DoubleFloat{Float64}(x))) mod1pipm(x::DoubleFloat{Float16}) = DoubleFloat{Float16}(mod1pipm(DoubleFloat{Float64}(x))) -function modhalfpipm(x::DoubleFloat{Float64}) - abs(x) < halfpi_d64 && return x - w1 = mul323(inv_pi_1o2_t64, HILO(x)) - w2 = two_sum(w1[1] - trunc(w1[1]), w1[2], w1[3]) - y = mul322(pi_1o2_t64, w2) - z = Double64(y) - return z -end - -modhalfpipm(x::DoubleFloat{Float32}) = DoubleFloat{Float32}(modhalfpipm(DoubleFloat{Float64}(x))) -modhalfpipm(x::DoubleFloat{Float16}) = DoubleFloat{Float16}(modhalfpipm(DoubleFloat{Float64}(x))) - -function modqrtrpipm(x::DoubleFloat{Float64}) - abs(x) < qrtrpi_d64 && return x - w1 = mul323(inv_pi_1o4_t64, HILO(x)) - w2 = two_sum(w1[1] - trunc(w1[1]), w1[2], w1[3]) - y = mul322(pi_1o4_t64, w2) - z = Double64(y) - return z -end - -modqrtrpipm(x::DoubleFloat{Float32}) = DoubleFloat{Float32}(modqrtrpipm(DoubleFloat{Float64}(x))) -modqrtrpipm(x::DoubleFloat{Float16}) = DoubleFloat{Float16}(modqrtrpipm(DoubleFloat{Float64}(x))) - - - #= rem2pi(x) = x - 2π*round(x/(2π),r) rem2pi(x, RoundDown) == mod2pi(x) @@ -235,40 +137,3 @@ rem2pi(x::DoubleFloat{Float32}, rounding::RoundingMode) = DoubleFloat{Float32}(rem2pi(DoubleFloat{Float64}(x), rounding)) rem2pi(x::DoubleFloat{Float16}, rounding::RoundingMode) = DoubleFloat{Float16}(rem2pi(DoubleFloat{Float64}(x), rounding)) - - -rem1pi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Down}) = mod1pi(x) -rem1pi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Up}) = -rem1pi(-x, RoundDown) -rem1pi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Nearest}) = modhalfpipm(x) -rem1pi(x::DoubleFloat{Float64}, rounding::RoundingMode{:ToZero}) = - signbit(x) ? rem1pi(x, RoundUp) : rem1pi(x, RoundDown) - -rem1pi(x::DoubleFloat{Float32}, rounding::RoundingMode) = - DoubleFloat{Float32}(rem1pi(DoubleFloat{Float64}(x), rounding)) -rem1pi(x::DoubleFloat{Float16}, rounding::RoundingMode) = - DoubleFloat{Float16}(rem1pi(DoubleFloat{Float64}(x), rounding)) - - -#= -remhalfpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Down}) = modhalfpi(x) -remhalfpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Up}) = -remhalfpi(-x, RoundDown) -remhalfpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Nearest}) = modqrtrpipm(x) -remhalfpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:ToZero}) = - signbit(x) ? remhalfpi(x, RoundUp) : remhalfpi(x, RoundDown) - -remhalfpi(x::DoubleFloat{Float32}, rounding::RoundingMode) = - DoubleFloat{Float32}(remhalfpi(DoubleFloat{Float64}(x), rounding)) -remhalfpi(x::DoubleFloat{Float16}, rounding::RoundingMode) = - DoubleFloat{Float16}(remhalfpi(DoubleFloat{Float64}(x), rounding)) - -remqrtrpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Down}) = modqrtrpi(x) -remqrtrpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Up}) = -remqrtrpi(-x, RoundDown) -# remqrtrpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:Nearest}) = modeighthpipm(x) -remqrtrpi(x::DoubleFloat{Float64}, rounding::RoundingMode{:ToZero}) = - signbit(x) ? remqrtrpi(x, RoundUp) : remqrtrpi(x, RoundDown) - -remqrtrpi(x::DoubleFloat{Float32}, rounding::RoundingMode) = - DoubleFloat{Float32}(remqrtpi(DoubleFloat{Float64}(x), rounding)) -remqrtrpi(x::DoubleFloat{Float16}, rounding::RoundingMode) = -DoubleFloat{Float16}(remqrtrpi(DoubleFloat{Float64}(x), rounding)) -=# diff --git a/test/arithmetic.jl b/test/arithmetic.jl index e11030b1..d4e232b5 100644 --- a/test/arithmetic.jl +++ b/test/arithmetic.jl @@ -78,15 +78,8 @@ end end @testset "rempi" begin - @test iszero(DoubleFloats.rem1pi(Double64(pi))) @test iszero(DoubleFloats.rem2pi(2*Double64(pi))) - @test iszero(DoubleFloats.remhalfpi(0.5*Double64(pi))) - @test iszero(DoubleFloats.remqrtrpi(0.25*Double64(pi))) - - @test iszero(DoubleFloats.rem1pi(Double32(pi))) @test iszero(DoubleFloats.rem2pi(2*Double32(pi))) - @test iszero(DoubleFloats.remhalfpi(0.5*Double32(pi))) - @test iszero(DoubleFloats.remqrtrpi(0.25*Double32(pi))) end @testset "hypot" begin diff --git a/test/modrem.jl b/test/modrem.jl index 4e99355f..b5610183 100644 --- a/test/modrem.jl +++ b/test/modrem.jl @@ -12,7 +12,5 @@ end x=cbrt(41)*sqrt(Double64(pi)) @test DoubleFloats.rem2pi(x) == Double64(6.111805926475162, -1.667563077572613e-16) - @test DoubleFloats.rem1pi(x) == Double64(2.9702132728853683, 1.54868222178066e-16) @test DoubleFloats.rem2pi(-x) == Double64(-0.1713793807044248, 4.647966647701768e-18) - @test DoubleFloats.rem1pi(-x) == Double64(2.9702132728853683, 1.54868222178066e-16) end