@@ -129,12 +129,183 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
129129(:: Type{T} )(x:: Rational ) where {T<: Integer } = (isinteger (x) ? convert (T, x. num):: T :
130130 throw (InexactError (nameof (T), T, x)))
131131
132- AbstractFloat (x:: Rational ) = (float (x. num)/ float (x. den)):: AbstractFloat
133- function (:: Type{T} )(x:: Rational{S} ) where T<: AbstractFloat where S
134- P = promote_type (T,S)
135- convert (T, convert (P,x. num)/ convert (P,x. den)):: T
132+ bit_width (n) = ndigits (n, base = UInt8 (2 ), pad = false )
133+
134+ const RoundingModeIndependentFromSign = Union{
135+ RoundingMode{:ToZero }, RoundingMode{:FromZero },
136+ RoundingMode{:Nearest }, RoundingMode{:NearestTiesAway }}
137+
138+ clamped_to_zero (x) = max (zero (x), x)
139+
140+ struct FloatComponentsResult{S<: Integer ,T<: Integer }
141+ mantissa:: S
142+ exponent:: T
143+ inexact:: Bool
144+ underflowed:: Bool
145+
146+ function FloatComponentsResult (m:: S , e:: T ;
147+ inexact:: Bool ,
148+ underflowed:: Bool ) where {S<: Integer ,T<: Integer }
149+ new {S,T} (m, e, inexact, underflowed)
150+ end
151+ end
152+
153+ # `num`, `den` are positive integers. `requested_precision` is the
154+ # requested floating-point precision. `T` is the integer type that
155+ # we'll be working with mostly, it needs to be wide enough.
156+ # `exp_lower_bound` is the minimum allowed normalized exponent for
157+ # normal numbers.
158+ function rational_to_float_components_impl (num:: Integer ,
159+ den:: Integer ,
160+ requested_precision:: Integer ,
161+ :: Type{T} ,
162+ romo:: RoundingModeIndependentFromSign ,
163+ exp_lower_bound:: Union{Nothing,Integer} ) where {T<: Integer }
164+ num_bit_width = bit_width (num)
165+ den_bit_width = bit_width (den)
166+
167+ # `T` must be wide enough to avoid overflow.
168+ numT = T (num)
169+ denT = T (den)
170+
171+ # Creates a mantissa in `quo_` that's at least
172+ # `requested_precision` bits wide.
173+ bit_shift_num_ = clamped_to_zero (den_bit_width - num_bit_width + requested_precision)
174+ quo_ = div (numT << bit_shift_num_, den, RoundToZero)
175+
176+ # Nonnegative. Experiments indicate that, when iterating over all
177+ # possible numerators and denominators below some bit width, with
178+ # some fixed value for `requested_precision`, the maximal attained
179+ # value will be
180+ # `max(1, max(num_bit_width, den_bit_width) - requested_precision)`.
181+ # So in the worst case we have `requested_precision == 1` and
182+ # `excess_precision == max(1, max(num_bit_width, den_bit_width) - 1)`
183+ excess_precision = bit_width (quo_) - requested_precision
184+
185+ # Normalized exponent
186+ nexp = bit_width (quo_) - bit_shift_num_ - true
187+
188+ # Take possible underflow into account: if there's underflow, the
189+ # precision needs to be less than requested.
190+ adjusted_precision = requested_precision
191+ if ! isnothing (exp_lower_bound)
192+ underflow = clamped_to_zero (exp_lower_bound - nexp)
193+ adjusted_precision -= underflow
194+ end
195+ adjusted_excess_precision = bit_width (quo_) - adjusted_precision
196+
197+ (adjusted_precision < 0 ) && return FloatComponentsResult (
198+ zero (quo_), zero (nexp), inexact = true , underflowed = true )
199+
200+ # Creates a mantissa in `quo` that's exactly `adjusted_precision`
201+ # bits wide, except if rounding up happens, in which case the bit
202+ # width may be `adjusted_precision + 1`.
203+ bit_shift_num = clamped_to_zero (bit_shift_num_ - adjusted_excess_precision)
204+ bit_shift_den = adjusted_excess_precision - (bit_shift_num_ - bit_shift_num) # nonnegative
205+ (quo, rem) = divrem (numT << bit_shift_num, denT << bit_shift_den, romo)
206+
207+ result_is_exact = iszero (rem)
208+
209+ nexp_final = nexp + (adjusted_precision < bit_width (quo))
210+ is_subnormal = ! isnothing (exp_lower_bound) && (nexp_final < exp_lower_bound)
211+
212+ iszero (quo) || (quo >>= trailing_zeros (quo))
213+
214+ # The bit width of `quo` is now less than or equal to
215+ # `adjusted_precision`, except if `adjusted_precision` is zero, in
216+ # which case `bit_width(quo)` may be one, in case rounding up
217+ # happened.
218+
219+ FloatComponentsResult (
220+ quo, nexp_final, inexact = ! result_is_exact, underflowed = is_subnormal)
221+ end
222+
223+ # `num`, `den` are positive integers. `bit_width` is the requested
224+ # floating-point precision and must be positive. `T` is the integer
225+ # type that we'll be working with mostly, it needs to be wide enough.
226+ # `exp_lower_bound` is the minimum allowed normalized exponent for
227+ # normal numbers.
228+ function rational_to_float_components (num:: Integer ,
229+ den:: Integer ,
230+ bit_width:: Integer ,
231+ :: Type{T} ,
232+ romo:: RoundingModeIndependentFromSign ,
233+ exp_lower_bound:: Union{Nothing,Integer} = nothing ) where {T<: Integer }
234+ # Factor out powers of two
235+ tz_num = trailing_zeros (num)
236+ tz_den = trailing_zeros (den)
237+ dexp = tz_num - tz_den
238+ exp_lb = isnothing (exp_lower_bound) ? nothing : exp_lower_bound - dexp
239+ c = rational_to_float_components_impl (
240+ num >> tz_num, den >> tz_den, bit_width, T, romo, exp_lb)
241+ FloatComponentsResult (
242+ c. mantissa, c. exponent + dexp, inexact = c. inexact, underflowed = c. underflowed)
243+ end
244+
245+ # Assuming the wanted rounding mode is round to nearest with ties to
246+ # even.
247+ #
248+ # `precision` must be positive.
249+ function rational_to_float_impl (to_float:: C ,
250+ :: Type{T} ,
251+ x:: Rational ,
252+ precision:: Integer ) where {C<: Union{Type,Function} ,T<: Integer }
253+ s = Int8 (sign (numerator (x)))
254+ a = abs (x)
255+
256+ num = numerator (a)
257+ den = denominator (a)
258+
259+ # Handle special cases
260+ num_is_zero = iszero (num)
261+ den_is_zero = iszero (den)
262+ if den_is_zero
263+ num_is_zero && return to_float (NaN ) # 0/0
264+ return to_float (s * Inf ) # n/0 = sign(n) * Inf
265+ end
266+ num_is_zero && return to_float (false ) # 0/n = 0
267+
268+ F = typeof (to_float (false ))
269+ c = rational_to_float_components (
270+ num, den, precision, T, RoundNearest, min_normal_number_exponent (F))
271+ bw = bit_width (c. mantissa)
272+
273+ # TODO : `ldexp` could be replaced with a mere bit of bit twiddling
274+ # in the case of `Float16`, `Float32`, `Float64`
275+ ldexp (s * to_float (c. mantissa), c. exponent - bw + true ):: F
136276end
137277
278+ function rational_to_float_promote_type (:: Type{F} ,
279+ :: Type{S} ) where {F<: AbstractFloat ,S<: Integer }
280+ BigInt
281+ end
282+
283+ function rational_to_float_promote_type (:: Type{F} ,
284+ :: Type{S} ) where {F<: AbstractFloat ,S<: Unsigned }
285+ rational_to_float_promote_type (F, widen (signed (S)))
286+ end
287+
288+ # As an optimization, use a narrower type when possible.
289+ rational_to_float_promote_type (:: Type{Float16} , :: Type{<:Union{Int8,Int16}} ) = Int32
290+ rational_to_float_promote_type (:: Type{Float32} , :: Type{<:Union{Int8,Int16}} ) = Int64
291+ rational_to_float_promote_type (:: Type{Float64} , :: Type{<:Union{Int8,Int16}} ) = Int128
292+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32}} , :: Type{Int32} ) = Int64
293+ rational_to_float_promote_type (:: Type{Float64} , :: Type{Int32} ) = Int128
294+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32,Float64}} , :: Type{Int64} ) = Int128
295+
296+ rational_to_float (:: Type{F} , x:: Rational{Bool} ) where {F<: AbstractFloat } = F (numerator (x)/ denominator (x))
297+
298+ function rational_to_float (:: Type{F} , x:: Rational{T} ) where {F<: AbstractFloat ,T<: Integer }
299+ rational_to_float_impl (F, rational_to_float_promote_type (F, T), x, precision (F)):: F
300+ end
301+
302+ function (:: Type{F} )(x:: Rational ) where {F<: AbstractFloat }
303+ y = unsafe_rational (numerator (x), denominator (x))
304+ rational_to_float (F, y):: F
305+ end
306+
307+ AbstractFloat (x:: Q ) where {Q<: Rational } = float (Q)(x):: AbstractFloat
308+
138309function Rational {T} (x:: AbstractFloat ) where T<: Integer
139310 r = rationalize (T, x, tol= 0 )
140311 x == convert (typeof (x), r) || throw (InexactError (:Rational , Rational{T}, x))
0 commit comments