|
| 1 | +# This file is a part of Julia. License is MIT: https://julialang.org/license |
| 2 | + |
| 3 | +module RationalToFloat |
| 4 | + |
| 5 | +# bit width |
| 6 | +const bw = Base.top_set_bit |
| 7 | + |
| 8 | +const Rnd = Base.Rounding |
| 9 | + |
| 10 | +# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed |
| 11 | +# to compile to a single instruction, because the semantics correspond |
| 12 | +# to what hardware usually provides. |
| 13 | +function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger} |
| 14 | + @inline begin |
| 15 | + mask = 8*sizeof(T) - 1 |
| 16 | + c = b & mask |
| 17 | + shift(a, c) |
| 18 | + end |
| 19 | +end |
| 20 | + |
| 21 | +machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported") |
| 22 | + |
| 23 | +# Fallback for `BigInt` etc. |
| 24 | +machine_shift(shift::S, a, b) where {S} = shift(a, b) |
| 25 | + |
| 26 | +# Arguments are positive integers. |
| 27 | +function div_significand_with_remainder(num, den, minimum_significand_size) |
| 28 | + clamped = x -> max(zero(x), x)::typeof(x) |
| 29 | + shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2) |
| 30 | + t = machine_shift(<<, num, shift) |
| 31 | + (divrem(t, den, RoundToZero)..., shift) |
| 32 | +end |
| 33 | + |
| 34 | +# `divrem(n, 1<<k, RoundToZero)` |
| 35 | +function divrem_2(n, k) |
| 36 | + quo = machine_shift(>>>, n, k) |
| 37 | + tmp = machine_shift(<<, quo, k) |
| 38 | + rem = n - tmp |
| 39 | + (quo, rem) |
| 40 | +end |
| 41 | + |
| 42 | +# `divrem_2(n, 1)` |
| 43 | +function divrem_2(n) |
| 44 | + quo = n >>> true |
| 45 | + rem = n % Bool |
| 46 | + (quo, rem) |
| 47 | +end |
| 48 | + |
| 49 | +function rational_to_float_components_impl0(num, den, precision, max_subnormal_exp) |
| 50 | + # `+1` because we need an extra, "round", bit for some rounding modes. |
| 51 | + # |
| 52 | + # TODO: as a performance optimization, only do this when required |
| 53 | + # by the rounding mode |
| 54 | + prec_p_1 = precision + true |
| 55 | + |
| 56 | + (quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1) |
| 57 | + |
| 58 | + width = bw(quo0) |
| 59 | + excess_width = width - prec_p_1 |
| 60 | + exp = width - shift - true |
| 61 | + |
| 62 | + exp_underflow = if isnothing(max_subnormal_exp) |
| 63 | + zero(exp) |
| 64 | + else |
| 65 | + let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T |
| 66 | + (signbit(d) ? z : d + true)::T |
| 67 | + end |
| 68 | + end |
| 69 | + |
| 70 | + (quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width) |
| 71 | + (quo2, rem2) = divrem_2(quo1) |
| 72 | + |
| 73 | + round_bit = rem2 |
| 74 | + sticky_bit = !iszero(rem1) | !iszero(rem0) |
| 75 | + |
| 76 | + (; integral_significand = quo2, exponent = exp, round_bit, sticky_bit) |
| 77 | +end |
| 78 | + |
| 79 | +struct RoundingIncrementHelper |
| 80 | + final_bit::Bool |
| 81 | + round_bit::Bool |
| 82 | + sticky_bit::Bool |
| 83 | +end |
| 84 | + |
| 85 | +(h::RoundingIncrementHelper)(::Rnd.FinalBit) = h.final_bit |
| 86 | +(h::RoundingIncrementHelper)(::Rnd.RoundBit) = h.round_bit |
| 87 | +(h::RoundingIncrementHelper)(::Rnd.StickyBit) = h.sticky_bit |
| 88 | + |
| 89 | +function rational_to_float_components_impl(num, den, precision, max_subnormal_exp, romo, sign_bit) |
| 90 | + # Helps ensure type stability |
| 91 | + fun = function(f::F, v) where {F} |
| 92 | + r = f(v) |
| 93 | + T = typeof(r) |
| 94 | + (T(v)::T, r)::NTuple{2,T} |
| 95 | + end |
| 96 | + |
| 97 | + fun_sig = x -> x >>> true |
| 98 | + fun_exp = x -> x + true |
| 99 | + overflows = (x, p) -> x == machine_shift(<<, one(x), p) |
| 100 | + |
| 101 | + t = rational_to_float_components_impl0(num, den, precision, max_subnormal_exp) |
| 102 | + raw_significand = t.integral_significand |
| 103 | + rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit) |
| 104 | + incr = Rnd.correct_rounding_requires_increment(rh, romo, sign_bit) |
| 105 | + rounded = raw_significand + incr |
| 106 | + (integral_significand, exponent) = let exp = t.exponent, s01, e01, se0, se1, T |
| 107 | + s01 = fun(fun_sig, rounded) |
| 108 | + e01 = fun(fun_exp, exp) |
| 109 | + se0 = (first(s01), first(e01)) |
| 110 | + se1 = ( last(s01), last(e01)) |
| 111 | + T = typeof(se0) |
| 112 | + (overflows(rounded, precision) ? se1 : se0)::T |
| 113 | + end |
| 114 | + (; integral_significand, exponent) |
| 115 | +end |
| 116 | + |
| 117 | +function rational_to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T} |
| 118 | + rational_to_float_components_impl(abs(T(num)), den, precision, max_subnormal_exp, romo, sb) |
| 119 | +end |
| 120 | + |
| 121 | +function rational_to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S} |
| 122 | + num_is_zero = iszero(num) |
| 123 | + den_is_zero = iszero(den) |
| 124 | + sb = signbit(num) |
| 125 | + is_zero = num_is_zero & !den_is_zero |
| 126 | + is_inf = !num_is_zero & den_is_zero |
| 127 | + is_regular = !num_is_zero & !den_is_zero |
| 128 | + if is_regular |
| 129 | + let |
| 130 | + c = rational_to_float_components(S, num, den, prec, nothing, rm, sb) |
| 131 | + exp = c.exponent |
| 132 | + signif = T(c.integral_significand)::T |
| 133 | + let x = ldexp(signif, exp - prec + true)::T |
| 134 | + sb ? -x : x |
| 135 | + end::T |
| 136 | + end |
| 137 | + else |
| 138 | + if is_zero |
| 139 | + zero(T)::T |
| 140 | + elseif is_inf |
| 141 | + T(Inf)::T |
| 142 | + else |
| 143 | + T(NaN)::T |
| 144 | + end |
| 145 | + end::T |
| 146 | +end |
| 147 | + |
| 148 | +function rational_to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S} |
| 149 | + rational_to_floating_point_fallback(T, S, num, den, rm, prec) |
| 150 | +end |
| 151 | + |
| 152 | +function rational_to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S} |
| 153 | + num_is_zero = iszero(num) |
| 154 | + den_is_zero = iszero(den) |
| 155 | + sb = signbit(num) |
| 156 | + is_zero = num_is_zero & !den_is_zero |
| 157 | + is_inf = !num_is_zero & den_is_zero |
| 158 | + is_regular = !num_is_zero & !den_is_zero |
| 159 | + (rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm) |
| 160 | + (false, false) |
| 161 | + else |
| 162 | + let from = Rnd.rounds_away_from_zero(rm, sb) |
| 163 | + (!from, from) |
| 164 | + end |
| 165 | + end::NTuple{2,Bool} |
| 166 | + exp_max = Base.exponent_max(T) |
| 167 | + exp_min = Base.exponent_min(T) |
| 168 | + ieee_repr = Base.ieee754_representation |
| 169 | + repr_zero = ieee_repr(T, sb, Val(:zero)) |
| 170 | + repr_inf = ieee_repr(T, sb, Val(:inf)) |
| 171 | + repr_nan = ieee_repr(T, sb, Val(:nan)) |
| 172 | + U = typeof(repr_zero) |
| 173 | + repr_zero::U |
| 174 | + repr_inf::U |
| 175 | + repr_nan::U |
| 176 | + |
| 177 | + ret_u = if is_regular |
| 178 | + let |
| 179 | + c = let e = exp_min - 1 |
| 180 | + rational_to_float_components(S, num, den, prec, e, rm, sb) |
| 181 | + end |
| 182 | + exp = c.exponent |
| 183 | + exp_diff = exp - exp_min |
| 184 | + is_normal = 0 ≤ exp_diff |
| 185 | + exp_is_huge_p = exp_max < exp |
| 186 | + exp_is_huge_n = signbit(exp_diff + prec) |
| 187 | + rounds_to_inf = exp_is_huge_p & !rm_is_to_zero |
| 188 | + rounds_to_zero = exp_is_huge_n & !rm_is_from_zero |
| 189 | + |
| 190 | + if !rounds_to_zero & !exp_is_huge_p |
| 191 | + let signif = (c.integral_significand % U) & Base.significand_mask(T) |
| 192 | + exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U |
| 193 | + ieee_repr(T, sb, exp_field, signif)::U |
| 194 | + end |
| 195 | + elseif rounds_to_zero |
| 196 | + repr_zero |
| 197 | + elseif rounds_to_inf |
| 198 | + repr_inf |
| 199 | + else |
| 200 | + ieee_repr(T, sb, Val(:omega)) |
| 201 | + end |
| 202 | + end |
| 203 | + else |
| 204 | + if is_zero |
| 205 | + repr_zero |
| 206 | + elseif is_inf |
| 207 | + repr_inf |
| 208 | + else |
| 209 | + repr_nan |
| 210 | + end |
| 211 | + end::U |
| 212 | + |
| 213 | + reinterpret(T, ret_u)::T |
| 214 | +end |
| 215 | + |
| 216 | +# `BigInt` is a safe default. |
| 217 | +rational_to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt |
| 218 | + |
| 219 | +const BitUnsignedOrBool = Union{Bool,Base.BitUnsigned} |
| 220 | +const BitIntegerOrBool = Union{Bool,Base.BitInteger} |
| 221 | +const BitSigned = Base.BitSigned |
| 222 | + |
| 223 | +integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{1}) = UInt16 |
| 224 | +integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{2}) = UInt16 |
| 225 | +integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{4}) = UInt32 |
| 226 | +integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{8}) = UInt64 |
| 227 | +integer_type_with_size(::Type{<:BitUnsignedOrBool}, ::Val{16}) = UInt128 |
| 228 | +integer_type_with_size(::Type{<:BitSigned}, ::Val{1}) = Int16 |
| 229 | +integer_type_with_size(::Type{<:BitSigned}, ::Val{2}) = Int16 |
| 230 | +integer_type_with_size(::Type{<:BitSigned}, ::Val{4}) = Int32 |
| 231 | +integer_type_with_size(::Type{<:BitSigned}, ::Val{8}) = Int64 |
| 232 | +integer_type_with_size(::Type{<:BitSigned}, ::Val{16}) = Int128 |
| 233 | + |
| 234 | +# As an optimization, use an integer type narrower than `BigInt` when possible. |
| 235 | +function rational_to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool} |
| 236 | + m = max(sizeof(F), sizeof(S))::Int |
| 237 | + Max = integer_type_with_size(S, Val(m)) |
| 238 | + widen(Max) |
| 239 | +end |
| 240 | + |
| 241 | +function rational_to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T} |
| 242 | + S = rational_to_float_promote_type(F, T) |
| 243 | + rational_to_floating_point_impl(F, S, num, den, rm, prec) |
| 244 | +end |
| 245 | + |
| 246 | +end |
0 commit comments