@@ -129,12 +129,300 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
129129(:: Type{T} )(x:: Rational ) where {T<: Integer } = (isinteger (x) ? convert (T, x. num):: T :
130130 throw (InexactError (nameof (T), T, x)))
131131
132- AbstractFloat (x:: Rational ) = (float (x. num)/ float (x. den)):: AbstractFloat
133- function (:: Type{T} )(x:: Rational{S} ) where T<: AbstractFloat where S
134- P = promote_type (T,S)
135- convert (T, convert (P,x. num)/ convert (P,x. den)):: T
132+ bit_width (n) = ndigits (n, base = UInt8 (2 ), pad = false )
133+
134+ function divrem_pow2 (num:: Integer , n:: Integer )
135+ quot = num >> n
136+ rema = num - (quot << n)
137+ (quot, rema)
138+ end
139+
140+ rounding_mode_translated_for_abs (
141+ rm:: Union {
142+ RoundingMode{:ToZero },
143+ RoundingMode{:FromZero },
144+ RoundingMode{:Nearest },
145+ RoundingMode{:NearestTiesAway },
146+ },
147+ :: Real ,
148+ ) =
149+ rm
150+
151+ rounding_mode_translated_for_abs (:: RoundingMode{:Down} , sign:: Real ) =
152+ ! signbit (sign) ? RoundToZero : RoundFromZero
153+
154+ rounding_mode_translated_for_abs (:: RoundingMode{:Up} , sign:: Real ) =
155+ ! signbit (sign) ? RoundFromZero : RoundToZero
156+
157+ # `num`, `den` are positive integers. `requested_bit_width` is the
158+ # requested floating-point precision. `T` is the integer type that
159+ # we'll be working with mostly, it needs to be wide enough.
160+ function rational_to_float_components_impl (
161+ num:: Integer ,
162+ den:: Integer ,
163+ requested_bit_width:: Integer ,
164+ :: Type{T} ,
165+ romo:: RoundingMode ,
166+ ) where {T<: Integer }
167+ num_bit_width = bit_width (num)
168+ den_bit_width = bit_width (den)
169+
170+ numT = T (num)
171+ denT = T (den)
172+
173+ # Must be positive.
174+ bit_shift_1 = den_bit_width - num_bit_width + requested_bit_width
175+ (false ≤ bit_shift_1) || (bit_shift_1 = zero (bit_shift_1))
176+
177+ # `T` must be wide enough to make overflow impossible during the
178+ # left shift here, we must not lose the high bits.
179+ #
180+ # Similarly, `scaled_num` must not be negative.
181+ scaled_num = numT << bit_shift_1
182+
183+ # `quo0` must be at least `requested_bit_width` bits wide.
184+ (quo0, rem0) = divrem (scaled_num, denT, RoundToZero)
185+ quo0_bit_width = bit_width (quo0)
186+
187+ # Now we have a mantissa in `quo0`, but need to round it to the
188+ # required precision.
189+
190+ # Should often be zero, but never negative.
191+ bit_shift_2 = quo0_bit_width - requested_bit_width
192+
193+ # `quo1` is `div(scaled_num, denT << bit_shift_2, RoundToZero)` and should
194+ # be exactly `requested_bit_width` bits wide.
195+ (quo1, rem1) = divrem_pow2 (quo0, bit_shift_2)
196+
197+ # `rem(scaled_num, denT << bit_shift_2, RoundToZero)`, but without the extra
198+ # computation.
199+ rem_total = rem1 * den + rem0
200+
201+ result_is_exact = iszero (rem_total)
202+
203+ mantissa = quo1
204+
205+ mantissa_carry = false
206+
207+ romo_is_to = romo == RoundToZero
208+ romo_is_af = romo == RoundFromZero
209+ romo_is_ntte = romo == RoundNearest
210+ romo_is_ntaf = romo == RoundNearestTiesAway
211+
212+ romo_is_to_nearest = romo_is_ntte | romo_is_ntaf
213+
214+ if ! result_is_exact & ! romo_is_to
215+ # Finish the rounding
216+
217+ (rem_quo, rem_rem) = divrem_pow2 (rem_total, bit_shift_2)
218+ to_nearest_compar_hi = rem_quo - (den >> true )
219+ to_nearest_compar_lo = (rem_rem << true ) - ((den & true ) << bit_shift_2)
220+ to_nearest_compar_hi_iszero = iszero (to_nearest_compar_hi)
221+ to_nearest_compar_lo_iszero = iszero (to_nearest_compar_lo)
222+ to_nearest_is_greater_than_half =
223+ (false < to_nearest_compar_hi) |
224+ (
225+ to_nearest_compar_hi_iszero &
226+ (false < to_nearest_compar_lo)
227+ )
228+ to_nearest_is_tied =
229+ to_nearest_compar_hi_iszero &
230+ to_nearest_compar_lo_iszero
231+
232+ mantissa_is_even = iszero (mantissa & true )
233+
234+ # True iff precision is one.
235+ mantissa_is_one = isone (mantissa)
236+
237+ if (
238+ romo_is_af |
239+ (
240+ romo_is_to_nearest &
241+ (
242+ to_nearest_is_greater_than_half |
243+ (
244+ to_nearest_is_tied &
245+ ! mantissa_is_one &
246+ (romo_is_ntaf | (romo_is_ntte & ! mantissa_is_even))
247+ )
248+ )
249+ )
250+ )
251+ # Round up
252+
253+ # Assuming `T` is wide enough and there's no overflow.
254+ mantissa += true
255+
256+ # We need to decrease the bit width in case it increased.
257+ mantissa_carry = ispow2 (mantissa)
258+ mantissa >>= mantissa_carry
259+ elseif romo_is_to_nearest & to_nearest_is_tied & mantissa_is_one
260+ # Mantissa is one, which means the precision is also
261+ # one. Be consistent with the `BigFloat` behavior, for
262+ # example: `BigFloat(3) == BigFloat(3.0) == 4`.
263+ mantissa_carry = true
264+ end
265+ end
266+
267+ # `mantissa` should now again be exactly `requested_bit_width` bits
268+ # wide.
269+
270+ exp = bit_shift_2 - bit_shift_1 + mantissa_carry
271+
272+ (
273+ mantissa = mantissa,
274+ exponent = exp,
275+ is_exact = result_is_exact,
276+ )
277+ end
278+
279+ # `num`, `den` are positive integers. `requested_bit_width` is the
280+ # requested floating-point precision. `T` is the integer type that
281+ # we'll be working with mostly, it needs to be wide enough.
282+ function rational_to_float_components (
283+ num:: Integer ,
284+ den:: Integer ,
285+ requested_bit_width:: Integer ,
286+ :: Type{T} ,
287+ romo:: RoundingMode ,
288+ ) where {T<: Integer }
289+ (false < requested_bit_width) || error (" nonpositive bit width" )
290+
291+ # Factor out powers of two
292+ trailing_zeros_num = trailing_zeros (num)
293+ trailing_zeros_den = trailing_zeros (den)
294+ num >>= trailing_zeros_num
295+ den >>= trailing_zeros_den
296+
297+ c = rational_to_float_components_impl (
298+ num, den, requested_bit_width, T, romo,
299+ )
300+
301+ (
302+ mantissa = c. mantissa,
303+ exponent = c. exponent + trailing_zeros_num - trailing_zeros_den,
304+ is_exact = c. is_exact,
305+ )
306+ end
307+
308+ function rational_to_float_impl (
309+ to_float:: C ,
310+ :: Type{<:Integer} ,
311+ x:: Rational{Bool} ,
312+ :: Integer ,
313+ ) where {C<: Union{Type,Function} }
314+ n = numerator (x)
315+ if iszero (denominator (x))
316+ if iszero (n)
317+ to_float (NaN ) # 0/0
318+ else
319+ to_float (Inf ) # 1/0
320+ end
321+ else
322+ # n/1 = n
323+ to_float (n)
324+ end
136325end
137326
327+ to_int8_if_bool (n:: Bool ) = Int8 (n)
328+ to_int8_if_bool (n:: Integer ) = n
329+
330+ # Assuming the wanted rounding mode is round to nearest with ties to
331+ # even.
332+ #
333+ # `requested_precision` must be positive.
334+ function rational_to_float_impl (
335+ to_float:: C ,
336+ :: Type{T} ,
337+ x:: Rational ,
338+ requested_precision:: Integer ,
339+ ) where {C<: Union{Type,Function} ,T<: Integer }
340+ s = Int8 (sign (numerator (x)))
341+ a = abs (x)
342+
343+ num = to_int8_if_bool (numerator (a))
344+ den = to_int8_if_bool (denominator (a))
345+
346+ # Handle special cases
347+ num_is_zero = iszero (num)
348+ den_is_zero = iszero (den)
349+ if den_is_zero
350+ num_is_zero && return to_float (NaN )
351+ return to_float (s * Inf )
352+ end
353+ num_is_zero && return to_float (false )
354+
355+ components = rational_to_float_components (
356+ num,
357+ den,
358+ requested_precision,
359+ T,
360+ RoundNearest,
361+ )
362+ mantissa = to_float (components. mantissa)
363+
364+ # TODO : `ldexp` could be replaced with a mere bit of bit twiddling
365+ # in the case of `Float16`, `Float32`, `Float64`
366+ ret = ldexp (s * mantissa, components. exponent)
367+
368+ # TODO : faster?
369+ if iszero (ret) | issubnormal (ret)
370+ # "Rounding to odd" to prevent double rounding error, see
371+ # https://hal-ens-lyon.archives-ouvertes.fr/inria-00080427v2
372+ components = rational_to_float_components (
373+ num,
374+ den,
375+ requested_precision,
376+ T,
377+ RoundToZero,
378+ )
379+ mantissa = to_float (components. mantissa | ! components. is_exact)
380+
381+ # TODO : `ldexp` could be replaced with a mere bit of bit
382+ # twiddling in the case of `Float16`, `Float32`, `Float64`
383+ ret = ldexp (s * mantissa, components. exponent)
384+ end
385+
386+ ret
387+ end
388+
389+ rational_to_float_promote_type (
390+ :: Type{F} ,
391+ :: Type{S} ,
392+ ) where {F<: AbstractFloat ,S<: Integer } =
393+ BigInt
394+
395+ rational_to_float_promote_type (
396+ :: Type{F} ,
397+ :: Type{S} ,
398+ ) where {F<: AbstractFloat ,S<: Unsigned } =
399+ rational_to_float_promote_type (F, signed (S))
400+
401+ # As an optimization, use a narrower type when possible.
402+ rational_to_float_promote_type (:: Type{Float16} , :: Type{<:Union{Int8,Int16}} ) = Int32
403+ rational_to_float_promote_type (:: Type{Float32} , :: Type{<:Union{Int8,Int16}} ) = Int64
404+ rational_to_float_promote_type (:: Type{Float64} , :: Type{<:Union{Int8,Int16}} ) = Int128
405+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32}} , :: Type{Int32} ) = Int64
406+ rational_to_float_promote_type (:: Type{Float64} , :: Type{Int32} ) = Int128
407+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32,Float64}} , :: Type{Int64} ) = Int128
408+
409+ (:: Type{F} )(x:: Rational ) where {F<: AbstractFloat } =
410+ rational_to_float_impl (
411+ F,
412+ rational_to_float_promote_type (
413+ F,
414+ promote_type (
415+ typeof (numerator (x)),
416+ typeof (denominator (x)),
417+ ),
418+ ),
419+ x,
420+ precision (F),
421+ ):: F
422+
423+ AbstractFloat (x:: Q ) where {Q<: Rational } =
424+ float (Q)(x):: AbstractFloat
425+
138426function Rational {T} (x:: AbstractFloat ) where T<: Integer
139427 r = rationalize (T, x, tol= 0 )
140428 x == convert (typeof (x), r) || throw (InexactError (:Rational , Rational{T}, x))
0 commit comments