Base: make constructing a float from a rational exact

nsajko · nsajko · commit d2b5d00f36ed · 2024-01-13T19:13:00.000+01:00
Implementation approach: 1. Convert the (numerator, denominator) pair to a (sign bit, integral significand, exponent) triplet using integer arithmetic. The integer type in question must be wide enough. 2. Convert the above triplet into an instance of the chosen FP type. There is special support for IEEE 754 floating-point and for `BigFloat`, otherwise a fallback using `ldexp` is used. As a bonus, constructing a `BigFloat` from a `Rational` should now be thread-safe when the rounding mode and precision are provided to the constructor, because there is no access to the global precision or rounding mode settings. Updates JuliaLang#45213 Updates JuliaLang#50940 Updates JuliaLang#52507 Fixes JuliaLang#52394 Closes JuliaLang#52395 Fixes JuliaLang#52859
diff --git a/base/Base.jl b/base/Base.jl
@@ -253,6 +253,7 @@ include("rounding.jl")
 include("div.jl")
 include("rawbigints.jl")
 include("float.jl")
+include("rational_to_float.jl")
 include("twiceprecision.jl")
 include("complex.jl")
 include("rational.jl")
diff --git a/base/mpfr.jl b/base/mpfr.jl
@@ -19,7 +19,8 @@ import
         isone, big, _string_n, decompose, minmax,
         sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
         uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
-        RawBigIntRoundingIncrementHelper, truncated, RawBigInt
+        RawBigIntRoundingIncrementHelper, truncated, RawBigInt, unsafe_rational,
+        RationalToFloat, rational_to_floating_point
 
 
 using .Base.Libc
@@ -310,12 +311,51 @@ BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=ROUNDING_MODE[]; pre
 BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
     BigFloat(Float64(x), r; precision=precision)
 
-function BigFloat(x::Rational, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
-    setprecision(BigFloat, precision) do
-        setrounding_raw(BigFloat, r) do
-            BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
+function set_2exp!(z::BigFloat, n::BigInt, exp::Int, rm::MPFRRoundingMode)
+    ccall(
+        (:mpfr_set_z_2exp, libmpfr),
+        Int32,
+        (Ref{BigFloat}, Ref{BigInt}, Int, MPFRRoundingMode),
+        z, n, exp, rm,
+    )
+    nothing
+end
+
+function RationalToFloat.rational_to_floating_point_impl(::Type{BigFloat}, ::Type{BigInt}, num, den, romo, prec)
+    num_is_zero = iszero(num)
+    den_is_zero = iszero(den)
+    s = Int8(sign(num))
+    sb = signbit(s)
+    is_zero = num_is_zero & !den_is_zero
+    is_inf = !num_is_zero & den_is_zero
+    is_regular = !num_is_zero & !den_is_zero
+
+    if is_regular
+        let rtfc = RationalToFloat.rational_to_float_components
+            c = rtfc(BigInt, num, den, prec, nothing, romo, sb)
+            ret = BigFloat(precision = prec)
+            mpfr_romo = convert(MPFRRoundingMode, romo)
+            set_2exp!(ret, s * c.integral_significand, Int(c.exponent - prec + true), mpfr_romo)
+            ret
         end
-    end
+    else
+        if is_zero
+            BigFloat(false, MPFRRoundToZero, precision = prec)
+        elseif is_inf
+            BigFloat(s * Inf, MPFRRoundToZero, precision = prec)
+        else
+            BigFloat(precision = prec)
+        end
+    end::BigFloat
+end
+
+function BigFloat(x::Rational, r::RoundingMode; precision::Integer = DEFAULT_PRECISION[])
+    rational_to_floating_point(BigFloat, x, r, precision)
+end
+
+function BigFloat(x::Rational, r::MPFRRoundingMode = ROUNDING_MODE[];
+                  precision::Integer = DEFAULT_PRECISION[])
+    rational_to_floating_point(BigFloat, x, r, precision)
 end
 
 function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=DEFAULT_PRECISION[], rounding::MPFRRoundingMode=ROUNDING_MODE[])
diff --git a/base/rational.jl b/base/rational.jl
@@ -140,10 +140,21 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
 (::Type{T})(x::Rational) where {T<:Integer} = (isinteger(x) ? convert(T, x.num)::T :
     throw(InexactError(nameof(T), T, x)))
 
-AbstractFloat(x::Rational) = (float(x.num)/float(x.den))::AbstractFloat
-function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
-    P = promote_type(T,S)
-    convert(T, convert(P,x.num)/convert(P,x.den))::T
+function numerator_denominator_promoted(x)
+    y = unsafe_rational(numerator(x), denominator(x))
+    (numerator(y), denominator(y))
+end
+
+function rational_to_floating_point(::Type{F}, x, rm = RoundNearest, prec = precision(F)) where {F}
+    nd = numerator_denominator_promoted(x)
+    RationalToFloat.rational_to_floating_point(F, nd..., rm, prec)::F
+end
+
+(::Type{F})(x::Rational) where {F<:AbstractFloat} = rational_to_floating_point(F, x)::F
+
+function AbstractFloat(x::Q) where {Q<:Rational}
+    T = float(Q)
+    T(x)::T::AbstractFloat
 end
 
 function Rational{T}(x::AbstractFloat) where T<:Integer
diff --git a/base/rational_to_float.jl b/base/rational_to_float.jl
@@ -0,0 +1,246 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module RationalToFloat
+
+# bit width
+const bw = Base.top_set_bit
+
+const Rnd = Base.Rounding
+
+# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed
+# to compile to a single instruction, because the semantics correspond
+# to what hardware usually provides.
+function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger}
+    @inline begin
+        mask = 8*sizeof(T) - 1
+        c = b & mask
+        shift(a, c)
+    end
+end
+
+machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported")
+
+# Fallback for `BigInt` etc.
+machine_shift(shift::S, a, b) where {S} = shift(a, b)
+
+# Arguments are positive integers.
+function div_significand_with_remainder(num, den, minimum_significand_size)
+    clamped = x -> max(zero(x), x)::typeof(x)
+    shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2)
+    t = machine_shift(<<, num, shift)
+    (divrem(t, den, RoundToZero)..., shift)
+end
+
+# `divrem(n, 1<<k, RoundToZero)`
+function divrem_2(n, k)
+    quo = machine_shift(>>>, n,   k)
+    tmp = machine_shift(<<,  quo, k)
+    rem = n - tmp
+    (quo, rem)
+end
+
+# `divrem_2(n, 1)`
+function divrem_2(n)
+    quo = n >>> true
+    rem = n % Bool
+    (quo, rem)
+end
+
+function rational_to_float_components_impl0(num, den, precision, max_subnormal_exp)
+    # `+1` because we need an extra, "round", bit for some rounding modes.
+    #
+    # TODO: as a performance optimization, only do this when required
+    #       by the rounding mode
+    prec_p_1 = precision + true
+
+    (quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1)
+
+    width = bw(quo0)
+    excess_width = width - prec_p_1
+    exp = width - shift - true
+
+    exp_underflow = if isnothing(max_subnormal_exp)
+        zero(exp)
+    else
+        let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T
+            (signbit(d) ? z : d + true)::T
+        end
+    end
+
+    (quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width)
+    (quo2, rem2) = divrem_2(quo1)
+
+    round_bit = rem2
+    sticky_bit = !iszero(rem1) | !iszero(rem0)
+
+    (; integral_significand = quo2, exponent = exp, round_bit, sticky_bit)
+end
+
+struct RoundingIncrementHelper
+    final_bit::Bool
+    round_bit::Bool
+    sticky_bit::Bool
+end
+
+(h::RoundingIncrementHelper)(::Rnd.FinalBit) = h.final_bit
+(h::RoundingIncrementHelper)(::Rnd.RoundBit) = h.round_bit
+(h::RoundingIncrementHelper)(::Rnd.StickyBit) = h.sticky_bit
+
+function rational_to_float_components_impl(num, den, precision, max_subnormal_exp, romo, sign_bit)
+    # Helps ensure type stability
+    fun = function(f::F, v) where {F}
+        r = f(v)
+        T = typeof(r)
+        (T(v)::T, r)::NTuple{2,T}
+    end
+
+    fun_sig = x -> x >>> true
+    fun_exp = x -> x + true
+    overflows = (x, p) -> x == machine_shift(<<, one(x), p)
+
+    t = rational_to_float_components_impl0(num, den, precision, max_subnormal_exp)
+    raw_significand = t.integral_significand
+    rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit)
+    incr = Rnd.correct_rounding_requires_increment(rh, romo, sign_bit)
+    rounded = raw_significand + incr
+    (integral_significand, exponent) = let exp = t.exponent, s01, e01, se0, se1, T
+        s01 = fun(fun_sig, rounded)
+        e01 = fun(fun_exp, exp)
+        se0 = (first(s01), first(e01))
+        se1 = ( last(s01),  last(e01))
+        T = typeof(se0)
+        (overflows(rounded, precision) ? se1 : se0)::T
+    end
+    (; integral_significand, exponent)
+end
+
+function rational_to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T}
+    rational_to_float_components_impl(abs(T(num)), den, precision, max_subnormal_exp, romo, sb)
+end
+
+function rational_to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
+    num_is_zero = iszero(num)
+    den_is_zero = iszero(den)
+    sb = signbit(num)
+    is_zero = num_is_zero & !den_is_zero
+    is_inf = !num_is_zero & den_is_zero
+    is_regular = !num_is_zero & !den_is_zero
+    if is_regular
+        let
+            c = rational_to_float_components(S, num, den, prec, nothing, rm, sb)
+            exp = c.exponent
+            signif = T(c.integral_significand)::T
+            let x = ldexp(signif, exp - prec + true)::T
+                sb ? -x : x
+            end::T
+        end
+    else
+        if is_zero
+            zero(T)::T
+        elseif is_inf
+            T(Inf)::T
+        else
+            T(NaN)::T
+        end
+    end::T
+end
+
+function rational_to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
+    rational_to_floating_point_fallback(T, S, num, den, rm, prec)
+end
+
+function rational_to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S}
+    num_is_zero = iszero(num)
+    den_is_zero = iszero(den)
+    sb = signbit(num)
+    is_zero = num_is_zero & !den_is_zero
+    is_inf = !num_is_zero & den_is_zero
+    is_regular = !num_is_zero & !den_is_zero
+    (rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm)
+        (false, false)
+    else
+        let from = Rnd.rounds_away_from_zero(rm, sb)
+            (!from, from)
+        end
+    end::NTuple{2,Bool}
+    exp_max = Base.exponent_max(T)
+    exp_min = Base.exponent_min(T)
+    ieee_repr = Base.ieee754_representation
+    repr_zero = ieee_repr(T, sb, Val(:zero))
+    repr_inf  = ieee_repr(T, sb, Val(:inf))
+    repr_nan  = ieee_repr(T, sb, Val(:nan))
+    U = typeof(repr_zero)
+    repr_zero::U
+    repr_inf::U
+    repr_nan::U
+
+    ret_u = if is_regular
+        let
+            c = let e = exp_min - 1
+                rational_to_float_components(S, num, den, prec, e, rm, sb)
+            end
+            exp = c.exponent
+            exp_diff = exp - exp_min
+            is_normal = 0 ≤ exp_diff
+            exp_is_huge_p = exp_max < exp
+            exp_is_huge_n = signbit(exp_diff + prec)
+            rounds_to_inf  = exp_is_huge_p & !rm_is_to_zero
+            rounds_to_zero = exp_is_huge_n & !rm_is_from_zero
+
+            if !rounds_to_zero & !exp_is_huge_p
+                let signif = (c.integral_significand % U) & Base.significand_mask(T)
+                    exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U
+                    ieee_repr(T, sb, exp_field, signif)::U
+                end
+            elseif rounds_to_zero
+                repr_zero
+            elseif rounds_to_inf
+                repr_inf
+            else
+                ieee_repr(T, sb, Val(:omega))
+            end
+        end
+    else
+        if is_zero
+            repr_zero
+        elseif is_inf
+            repr_inf
+        else
+            repr_nan
+        end
+    end::U
+
+    reinterpret(T, ret_u)::T
+end
+
+# `BigInt` is a safe default.
+rational_to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt
+
+const BitUnsignedOrBool = Union{Bool,Base.BitUnsigned}
+const BitIntegerOrBool  = Union{Bool,Base.BitInteger}
+const BitSigned = Base.BitSigned
+
+integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{1})  = UInt16
+integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{2})  = UInt16
+integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{4})  = UInt32
+integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{8})  = UInt64
+integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{16}) = UInt128
+integer_type_with_size(::Type{<:BitSigned},         ::Val{1})  =  Int16
+integer_type_with_size(::Type{<:BitSigned},         ::Val{2})  =  Int16
+integer_type_with_size(::Type{<:BitSigned},         ::Val{4})  =  Int32
+integer_type_with_size(::Type{<:BitSigned},         ::Val{8})  =  Int64
+integer_type_with_size(::Type{<:BitSigned},         ::Val{16}) =  Int128
+
+# As an optimization, use an integer type narrower than `BigInt` when possible.
+function rational_to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool}
+    m = max(sizeof(F), sizeof(S))::Int
+    Max = integer_type_with_size(S, Val(m))
+    widen(Max)
+end
+
+function rational_to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T}
+    S = rational_to_float_promote_type(F, T)
+    rational_to_floating_point_impl(F, S, num, den, rm, prec)
+end
+
+end
diff --git a/test/choosetests.jl b/test/choosetests.jl
@@ -11,7 +11,7 @@ const TESTNAMES = [
         "char", "strings", "triplequote", "unicode", "intrinsics",
         "dict", "hashing", "iobuffer", "staged", "offsetarray",
         "arrayops", "tuple", "reduce", "reducedim", "abstractarray",
-        "intfuncs", "simdloop", "vecelement", "rational",
+        "intfuncs", "simdloop", "vecelement", "rational", "rational_to_float",
         "bitarray", "copy", "math", "fastmath", "functional", "iterators",
         "operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
         "sorting", "spawn", "backtrace", "exceptions",
diff --git a/test/rational.jl b/test/rational.jl
diff --git a/test/rational_to_float.jl b/test/rational_to_float.jl