@@ -129,12 +129,182 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
129129(:: Type{T} )(x:: Rational ) where {T<: Integer } = (isinteger (x) ? convert (T, x. num):: T :
130130 throw (InexactError (nameof (T), T, x)))
131131
132- AbstractFloat (x:: Rational ) = (float (x. num)/ float (x. den)):: AbstractFloat
133- function (:: Type{T} )(x:: Rational{S} ) where T<: AbstractFloat where S
134- P = promote_type (T,S)
135- convert (T, convert (P,x. num)/ convert (P,x. den)):: T
132+ bit_width (n) = ndigits (n, base = UInt8 (2 ), pad = false )
133+
134+ const RoundingModeIndependentFromSign = Union{
135+ RoundingMode{:ToZero }, RoundingMode{:FromZero },
136+ RoundingMode{:Nearest }, RoundingMode{:NearestTiesAway }}
137+
138+ clamped_to_zero (x) = max (zero (x), x)
139+
140+ struct FloatComponentsResult{S<: Integer ,T<: Integer }
141+ mantissa:: S
142+ exponent:: T
143+ inexact:: Bool
144+ underflowed:: Bool
145+
146+ function FloatComponentsResult (m:: S , e:: T ;
147+ inexact:: Bool ,
148+ underflowed:: Bool ) where {S<: Integer ,T<: Integer }
149+ new {S,T} (m, e, inexact, underflowed)
150+ end
151+ end
152+
153+ # `num`, `den` are positive integers. `requested_precision` is the
154+ # requested floating-point precision. `T` is the integer type that
155+ # we'll be working with mostly, it needs to be wide enough.
156+ # `exp_lower_bound` is the minimum allowed normalized exponent for
157+ # normal numbers.
158+ function rational_to_float_components_impl (num:: Integer ,
159+ den:: Integer ,
160+ requested_precision:: Integer ,
161+ :: Type{T} ,
162+ romo:: RoundingModeIndependentFromSign ,
163+ exp_lower_bound:: Union{Nothing,Integer} ) where {T<: Integer }
164+ num_bit_width = bit_width (num)
165+ den_bit_width = bit_width (den)
166+
167+ # `T` must be wide enough to avoid overflow.
168+ numT = T (num)
169+ denT = T (den)
170+
171+ # Creates a mantissa in `quo_` that's at least
172+ # `requested_precision` bits wide.
173+ bit_shift_num_ = clamped_to_zero (den_bit_width - num_bit_width + requested_precision)
174+ quo_ = div (numT << bit_shift_num_, den, RoundToZero)
175+
176+ # Normalized exponent
177+ nexp = bit_width (quo_) - bit_shift_num_ - true
178+
179+ # Take possible underflow into account: if there's underflow, the
180+ # precision needs to be less than requested.
181+ adjusted_precision = requested_precision
182+ if ! isnothing (exp_lower_bound)
183+ underflow = clamped_to_zero (exp_lower_bound - nexp)
184+ adjusted_precision -= underflow
185+ end
186+
187+ # Nonnegative. Experiments indicate that, when iterating over all
188+ # possible numerators and denominators below some bit width, with
189+ # some fixed value for `adjusted_precision`, the maximal attained
190+ # value will be
191+ # `max(1, max(num_bit_width, den_bit_width) - adjusted_precision)`.
192+ # So in the worst case we have `adjusted_precision == 1` and
193+ # `excess_precision == max(1, max(num_bit_width, den_bit_width) - 1)`
194+ excess_precision = bit_width (quo_) - adjusted_precision
195+
196+ (adjusted_precision < 0 ) && return FloatComponentsResult (
197+ zero (quo_), zero (nexp), inexact = true , underflowed = true )
198+
199+ # Creates a mantissa in `quo` that's exactly `adjusted_precision`
200+ # bits wide, except if rounding up happens, in which case the bit
201+ # width may be `adjusted_precision + 1`.
202+ bit_shift_num = clamped_to_zero (bit_shift_num_ - excess_precision)
203+ bit_shift_den = excess_precision - (bit_shift_num_ - bit_shift_num) # nonnegative
204+ (quo, rem) = divrem (numT << bit_shift_num, denT << bit_shift_den, romo)
205+
206+ result_is_exact = iszero (rem)
207+
208+ nexp_final = nexp + (adjusted_precision < bit_width (quo))
209+ is_subnormal = ! isnothing (exp_lower_bound) && (nexp_final < exp_lower_bound)
210+
211+ iszero (quo) || (quo >>= trailing_zeros (quo))
212+
213+ # The bit width of `quo` is now less than or equal to
214+ # `adjusted_precision`, except if `adjusted_precision` is zero, in
215+ # which case `bit_width(quo)` may be one, in case rounding up
216+ # happened.
217+
218+ FloatComponentsResult (
219+ quo, nexp_final, inexact = ! result_is_exact, underflowed = is_subnormal)
220+ end
221+
222+ # `num`, `den` are positive integers. `bit_width` is the requested
223+ # floating-point precision and must be positive. `T` is the integer
224+ # type that we'll be working with mostly, it needs to be wide enough.
225+ # `exp_lower_bound` is the minimum allowed normalized exponent for
226+ # normal numbers.
227+ function rational_to_float_components (num:: Integer ,
228+ den:: Integer ,
229+ bit_width:: Integer ,
230+ :: Type{T} ,
231+ romo:: RoundingModeIndependentFromSign ,
232+ exp_lower_bound:: Union{Nothing,Integer} = nothing ) where {T<: Integer }
233+ # Factor out powers of two
234+ tz_num = trailing_zeros (num)
235+ tz_den = trailing_zeros (den)
236+ dexp = tz_num - tz_den
237+ exp_lb = isnothing (exp_lower_bound) ? nothing : exp_lower_bound - dexp
238+ c = rational_to_float_components_impl (
239+ num >> tz_num, den >> tz_den, bit_width, T, romo, exp_lb)
240+ FloatComponentsResult (
241+ c. mantissa, c. exponent + dexp, inexact = c. inexact, underflowed = c. underflowed)
242+ end
243+
244+ # Assuming the wanted rounding mode is round to nearest with ties to
245+ # even.
246+ #
247+ # `precision` must be positive.
248+ function rational_to_float_impl (to_float:: C ,
249+ :: Type{T} ,
250+ x:: Rational ,
251+ precision:: Integer ) where {C<: Union{Type,Function} ,T<: Integer }
252+ s = Int8 (sign (numerator (x)))
253+ a = abs (x)
254+
255+ num = numerator (a)
256+ den = denominator (a)
257+
258+ # Handle special cases
259+ num_is_zero = iszero (num)
260+ den_is_zero = iszero (den)
261+ if den_is_zero
262+ num_is_zero && return to_float (NaN ) # 0/0
263+ return to_float (s * Inf ) # n/0 = sign(n) * Inf
264+ end
265+ num_is_zero && return to_float (false ) # 0/n = 0
266+
267+ F = typeof (to_float (false ))
268+ c = rational_to_float_components (
269+ num, den, precision, T, RoundNearest, min_normal_number_exponent (F))
270+ bw = bit_width (c. mantissa)
271+
272+ # TODO : `ldexp` could be replaced with a mere bit of bit twiddling
273+ # in the case of `Float16`, `Float32`, `Float64`
274+ ldexp (s * to_float (c. mantissa), c. exponent - bw + true ):: F
136275end
137276
277+ function rational_to_float_promote_type (:: Type{F} ,
278+ :: Type{S} ) where {F<: AbstractFloat ,S<: Integer }
279+ BigInt
280+ end
281+
282+ function rational_to_float_promote_type (:: Type{F} ,
283+ :: Type{S} ) where {F<: AbstractFloat ,S<: Unsigned }
284+ rational_to_float_promote_type (F, widen (signed (S)))
285+ end
286+
287+ # As an optimization, use a narrower type when possible.
288+ rational_to_float_promote_type (:: Type{Float16} , :: Type{<:Union{Int8,Int16}} ) = Int32
289+ rational_to_float_promote_type (:: Type{Float32} , :: Type{<:Union{Int8,Int16}} ) = Int64
290+ rational_to_float_promote_type (:: Type{Float64} , :: Type{<:Union{Int8,Int16}} ) = Int128
291+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32}} , :: Type{Int32} ) = Int64
292+ rational_to_float_promote_type (:: Type{Float64} , :: Type{Int32} ) = Int128
293+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32,Float64}} , :: Type{Int64} ) = Int128
294+
295+ rational_to_float (:: Type{F} , x:: Rational{Bool} ) where {F<: AbstractFloat } = F (numerator (x)/ denominator (x))
296+
297+ function rational_to_float (:: Type{F} , x:: Rational{T} ) where {F<: AbstractFloat ,T<: Integer }
298+ rational_to_float_impl (F, rational_to_float_promote_type (F, T), x, precision (F)):: F
299+ end
300+
301+ function (:: Type{F} )(x:: Rational ) where {F<: AbstractFloat }
302+ y = unsafe_rational (numerator (x), denominator (x))
303+ rational_to_float (F, y):: F
304+ end
305+
306+ AbstractFloat (x:: Q ) where {Q<: Rational } = float (Q)(x):: AbstractFloat
307+
138308function Rational {T} (x:: AbstractFloat ) where T<: Integer
139309 r = rationalize (T, x, tol= 0 )
140310 x == convert (typeof (x), r) || throw (InexactError (:Rational , Rational{T}, x))
0 commit comments