@@ -129,12 +129,220 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
129129(:: Type{T} )(x:: Rational ) where {T<: Integer } = (isinteger (x) ? convert (T, x. num):: T :
130130 throw (InexactError (nameof (T), T, x)))
131131
132- AbstractFloat (x:: Rational ) = (float (x. num)/ float (x. den)):: AbstractFloat
133- function (:: Type{T} )(x:: Rational{S} ) where T<: AbstractFloat where S
134- P = promote_type (T,S)
135- convert (T, convert (P,x. num)/ convert (P,x. den)):: T
132+ bit_width (n) = ndigits (n, base = UInt8 (2 ), pad = false )
133+
134+ function divrem_pow2 (num:: Integer , n:: Integer )
135+ quot = num >> n
136+ rema = num - (quot << n)
137+ (quot, rema)
138+ end
139+
140+ const RoundingModesIndependentFromSign = Union{
141+ RoundingMode{:ToZero }, RoundingMode{:FromZero },
142+ RoundingMode{:Nearest }, RoundingMode{:NearestTiesAway }}
143+
144+ rounding_mode_translated_for_abs (rm:: RoundingModesIndependentFromSign , :: Real ) = rm
145+
146+ function rounding_mode_translated_for_abs (:: RoundingMode{:Down} , sign:: Real )
147+ ! signbit (sign) ? RoundToZero : RoundFromZero
148+ end
149+
150+ function rounding_mode_translated_for_abs (:: RoundingMode{:Up} , sign:: Real )
151+ ! signbit (sign) ? RoundFromZero : RoundToZero
152+ end
153+
154+ clamped_to_zero (x) = max (zero (x), x)
155+
156+ struct FloatComponentsResult{S<: Integer ,T<: Integer }
157+ mantissa:: S
158+ exponent:: T
159+ inexact:: Bool
160+ underflowed:: Bool
161+
162+ function FloatComponentsResult (m:: S , e:: T ;
163+ inexact:: Bool ,
164+ underflowed:: Bool ) where {S<: Integer ,T<: Integer }
165+ new {S,T} (m, e, inexact, underflowed)
166+ end
167+ end
168+
169+ # `num`, `den` are positive integers. `requested_precision` is the
170+ # requested floating-point precision. `T` is the integer type that
171+ # we'll be working with mostly, it needs to be wide enough.
172+ # `exp_lower_bound` is the minimum allowed normalized exponent for
173+ # normal numbers.
174+ function rational_to_float_components_impl (num:: Integer ,
175+ den:: Integer ,
176+ requested_precision:: Integer ,
177+ :: Type{T} ,
178+ romo:: RoundingMode ,
179+ exp_lower_bound:: Union{Nothing,Integer} ) where
180+ {T<: Integer }
181+ num_bit_width = bit_width (num)
182+ den_bit_width = bit_width (den)
183+
184+ # `T` must be wide enough to avoid overflow.
185+ numT = T (num)
186+ denT = T (den)
187+
188+ # Creates a mantissa in `quo_` that's at least
189+ # `requested_precision` bits wide.
190+ bit_shift_num_ = clamped_to_zero (den_bit_width - num_bit_width + requested_precision)
191+ quo_ = div (numT << bit_shift_num_, den, RoundToZero)
192+
193+ # Nonnegative. Experiments indicate that, when iterating over all
194+ # possible numerators and denominators below some bit width, with
195+ # some fixed value for `requested_precision`, the maximal attained
196+ # value will be
197+ # `max(1, max(num_bit_width, den_bit_width) - requested_precision)`.
198+ # So in the worst case we have `requested_precision == 1` and
199+ # `excess_precision == max(1, max(num_bit_width, den_bit_width) - 1)`
200+ excess_precision = bit_width (quo_) - requested_precision
201+
202+ # Normalized exponent
203+ nexp = bit_width (quo_) - bit_shift_num_ - true
204+
205+ # Take possible underflow into account: if there's underflow, the
206+ # precision needs to be less than requested.
207+ adjusted_precision = requested_precision
208+ if ! isnothing (exp_lower_bound)
209+ underflow = clamped_to_zero (exp_lower_bound - nexp)
210+ adjusted_precision -= underflow
211+ end
212+ adjusted_excess_precision = bit_width (quo_) - adjusted_precision
213+
214+ (adjusted_precision < false ) && return FloatComponentsResult (
215+ zero (quo_), zero (nexp), inexact = true , underflowed = true )
216+
217+ # Creates a mantissa in `quo` that's exactly `adjusted_precision`
218+ # bits wide, except if rounding up happens, in which case the bit
219+ # width may be `adjusted_precision + 1`.
220+ bit_shift_num = clamped_to_zero (bit_shift_num_ - adjusted_excess_precision)
221+ bit_shift_den = adjusted_excess_precision - (bit_shift_num_ - bit_shift_num) # nonnegative
222+ (quo, rem) = divrem (numT << bit_shift_num, denT << bit_shift_den, romo)
223+
224+ result_is_exact = iszero (rem)
225+
226+ nexp_final = nexp + (adjusted_precision < bit_width (quo))
227+ is_subnormal = ! isnothing (exp_lower_bound) && (nexp_final < exp_lower_bound)
228+
229+ iszero (quo) || (quo >>= trailing_zeros (quo))
230+
231+ # The bit width of `quo` is now less than or equal to
232+ # `adjusted_precision`, except if `adjusted_precision` is zero, in
233+ # which case `bit_width(quo)` may be one, in case rounding up
234+ # happened.
235+
236+ FloatComponentsResult (
237+ quo, nexp_final, inexact = ! result_is_exact, underflowed = is_subnormal)
238+ end
239+
240+ # `num`, `den` are positive integers. `bit_width` is the requested
241+ # floating-point precision and must be positive. `T` is the integer
242+ # type that we'll be working with mostly, it needs to be wide enough.
243+ # `exp_lower_bound` is the minimum allowed normalized exponent for
244+ # normal numbers.
245+ function rational_to_float_components (num:: Integer ,
246+ den:: Integer ,
247+ bit_width:: Integer ,
248+ :: Type{T} ,
249+ romo:: RoundingMode ,
250+ exp_lower_bound:: Union{Nothing,Integer} =
251+ nothing ) where {T<: Integer }
252+ h (:: Nothing , :: Integer ) = nothing
253+ h (a:: Integer , b:: Integer ) = a - b
254+
255+ # Factor out powers of two
256+ tz_num = trailing_zeros (num)
257+ tz_den = trailing_zeros (den)
258+ dexp = tz_num - tz_den
259+ c = rational_to_float_components_impl (
260+ num >> tz_num, den >> tz_den, bit_width, T, romo, h (exp_lower_bound, dexp))
261+ FloatComponentsResult (
262+ c. mantissa, c. exponent + dexp, inexact = c. inexact, underflowed = c. underflowed)
263+ end
264+
265+ # Assuming the wanted rounding mode is round to nearest with ties to
266+ # even.
267+ #
268+ # `precision` must be positive.
269+ function rational_to_float_impl (to_float:: C ,
270+ :: Type{T} ,
271+ x:: Rational ,
272+ precision:: Integer ) where {C<: Union{Type,Function} ,T<: Integer }
273+ s = Int8 (sign (numerator (x)))
274+ a = abs (x)
275+
276+ num = numerator (a)
277+ den = denominator (a)
278+
279+ # Handle special cases
280+ num_is_zero = iszero (num)
281+ den_is_zero = iszero (den)
282+ if den_is_zero
283+ num_is_zero && return to_float (NaN ) # 0/0
284+ return to_float (s * Inf ) # n/0 = sign(n) * Inf
285+ end
286+ num_is_zero && return to_float (false ) # 0/n = 0
287+
288+ F = typeof (to_float (false ))
289+ c = rational_to_float_components (
290+ num, den, precision, T, RoundNearest, min_normal_number_exponent (F))
291+ bw = bit_width (c. mantissa)
292+
293+ # TODO : `ldexp` could be replaced with a mere bit of bit twiddling
294+ # in the case of `Float16`, `Float32`, `Float64`
295+ ldexp (s * to_float (c. mantissa), c. exponent - bw + true ):: F
136296end
137297
298+ function rational_to_float_promote_type (:: Type{F} ,
299+ :: Type{S} ) where {F<: AbstractFloat ,S<: Integer }
300+ BigInt
301+ end
302+
303+ function rational_to_float_promote_type (:: Type{F} ,
304+ :: Type{S} ) where {F<: AbstractFloat ,S<: Unsigned }
305+ rational_to_float_promote_type (F, widen (signed (S)))
306+ end
307+
308+ # As an optimization, use a narrower type when possible.
309+ rational_to_float_promote_type (:: Type{Float16} , :: Type{<:Union{Int8,Int16}} ) = Int32
310+ rational_to_float_promote_type (:: Type{Float32} , :: Type{<:Union{Int8,Int16}} ) = Int64
311+ rational_to_float_promote_type (:: Type{Float64} , :: Type{<:Union{Int8,Int16}} ) = Int128
312+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32}} , :: Type{Int32} ) = Int64
313+ rational_to_float_promote_type (:: Type{Float64} , :: Type{Int32} ) = Int128
314+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32,Float64}} , :: Type{Int64} ) = Int128
315+
316+ function rational_bool_to_float (to_float:: C ,
317+ x:: Rational{Bool} ) where {C<: Union{Type,Function} }
318+ n = numerator (x)
319+ if iszero (denominator (x))
320+ if iszero (n)
321+ to_float (NaN ) # 0/0
322+ else
323+ to_float (Inf ) # 1/0
324+ end
325+ else
326+ # n/1 = n
327+ to_float (n)
328+ end
329+ end
330+
331+ function rational_to_float (:: Type{F} , x:: Rational{Bool} ) where {F<: AbstractFloat }
332+ rational_bool_to_float (F, x):: F
333+ end
334+
335+ function rational_to_float (:: Type{F} , x:: Rational{T} ) where {F<: AbstractFloat ,T<: Integer }
336+ rational_to_float_impl (F, rational_to_float_promote_type (F, T), x, precision (F)):: F
337+ end
338+
339+ function (:: Type{F} )(x:: Rational ) where {F<: AbstractFloat }
340+ y = Rational (promote (numerator (x), denominator (x))... )
341+ rational_to_float (F, y):: F
342+ end
343+
344+ AbstractFloat (x:: Q ) where {Q<: Rational } = float (Q)(x):: AbstractFloat
345+
138346function Rational {T} (x:: AbstractFloat ) where T<: Integer
139347 r = rationalize (T, x, tol= 0 )
140348 x == convert (typeof (x), r) || throw (InexactError (:Rational , Rational{T}, x))
0 commit comments