@@ -129,12 +129,301 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
129129(:: Type{T} )(x:: Rational ) where {T<: Integer } = (isinteger (x) ? convert (T, x. num):: T :
130130 throw (InexactError (nameof (T), T, x)))
131131
132- AbstractFloat (x:: Rational ) = (float (x. num)/ float (x. den)):: AbstractFloat
133- function (:: Type{T} )(x:: Rational{S} ) where T<: AbstractFloat where S
134- P = promote_type (T,S)
135- convert (T, convert (P,x. num)/ convert (P,x. den)):: T
132+ bit_width (n) = ndigits (n, base = UInt8 (2 ), pad = false )
133+
134+ function divrem_pow2 (num:: Integer , n:: Integer )
135+ quot = num >> n
136+ rema = num - (quot << n)
137+ (quot, rema)
138+ end
139+
140+ rounding_mode_translated_for_abs (
141+ rm:: Union {
142+ RoundingMode{:ToZero },
143+ RoundingMode{:FromZero },
144+ RoundingMode{:Nearest },
145+ RoundingMode{:NearestTiesAway },
146+ },
147+ :: Real ,
148+ ) =
149+ rm
150+
151+ rounding_mode_translated_for_abs (:: RoundingMode{:Down} , sign:: Real ) =
152+ ! signbit (sign) ? RoundToZero : RoundFromZero
153+
154+ rounding_mode_translated_for_abs (:: RoundingMode{:Up} , sign:: Real ) =
155+ ! signbit (sign) ? RoundFromZero : RoundToZero
156+
157+ # `num`, `den` are positive integers. `requested_bit_width` is the
158+ # requested floating-point precision. `T` is the integer type that
159+ # we'll be working with mostly, it needs to be wide enough.
160+ function rational_to_float_components_impl (
161+ num:: Integer ,
162+ den:: Integer ,
163+ requested_bit_width:: Integer ,
164+ :: Type{T} ,
165+ romo:: RoundingMode ,
166+ ) where {T<: Integer }
167+ num_bit_width = bit_width (num)
168+ den_bit_width = bit_width (den)
169+
170+ numT = T (num)
171+ denT = T (den)
172+
173+ # Must be positive.
174+ bit_shift_1 = den_bit_width - num_bit_width + requested_bit_width
175+ (false ≤ bit_shift_1) || (bit_shift_1 = zero (bit_shift_1))
176+
177+ # `T` must be wide enough to make overflow impossible during the
178+ # left shift here, we must not lose the high bits.
179+ #
180+ # Similarly, `scaled_num` must not be negative.
181+ scaled_num = numT << bit_shift_1
182+
183+ # `quo0` must be at least `requested_bit_width` bits wide.
184+ (quo0, rem0) = divrem (scaled_num, denT, RoundToZero)
185+ quo0_bit_width = bit_width (quo0)
186+
187+ # Now we have a mantissa in `quo0`, but need to round it to the
188+ # required precision.
189+
190+ # Should often be zero, but never negative.
191+ bit_shift_2 = quo0_bit_width - requested_bit_width
192+
193+ # `quo1` is `div(scaled_num, denT << bit_shift_2, RoundToZero)` and should
194+ # be exactly `requested_bit_width` bits wide.
195+ (quo1, rem1) = divrem_pow2 (quo0, bit_shift_2)
196+
197+ # `rem(scaled_num, denT << bit_shift_2, RoundToZero)`, but without the extra
198+ # computation.
199+ rem_total = rem1 * den + rem0
200+
201+ result_is_exact = iszero (rem_total)
202+
203+ mantissa = quo1
204+
205+ mantissa_carry = false
206+
207+ romo_is_to = romo == RoundToZero
208+ romo_is_af = romo == RoundFromZero
209+ romo_is_ntte = romo == RoundNearest
210+ romo_is_ntaf = romo == RoundNearestTiesAway
211+
212+ romo_is_to_nearest = romo_is_ntte | romo_is_ntaf
213+
214+ if ! result_is_exact & ! romo_is_to
215+ # Finish the rounding
216+
217+ (rem_quo, rem_rem) = divrem_pow2 (rem_total, bit_shift_2)
218+ to_nearest_compar_hi = rem_quo - (den >> true )
219+ to_nearest_compar_lo = (rem_rem << true ) - ((den & true ) << bit_shift_2)
220+ to_nearest_compar_hi_iszero = iszero (to_nearest_compar_hi)
221+ to_nearest_compar_lo_iszero = iszero (to_nearest_compar_lo)
222+ to_nearest_is_greater_than_half =
223+ (false < to_nearest_compar_hi) |
224+ (
225+ to_nearest_compar_hi_iszero &
226+ (false < to_nearest_compar_lo)
227+ )
228+ to_nearest_is_tied =
229+ to_nearest_compar_hi_iszero &
230+ to_nearest_compar_lo_iszero
231+
232+ mantissa_is_even = iszero (mantissa & true )
233+
234+ # True iff precision is one.
235+ mantissa_is_one = isone (mantissa)
236+
237+ if (
238+ romo_is_af |
239+ (
240+ romo_is_to_nearest &
241+ (
242+ to_nearest_is_greater_than_half |
243+ (
244+ to_nearest_is_tied &
245+ ! mantissa_is_one &
246+ (romo_is_ntaf | (romo_is_ntte & ! mantissa_is_even))
247+ )
248+ )
249+ )
250+ )
251+ # Round up
252+
253+ # Assuming `T` is wide enough and there's no overflow.
254+ mantissa += true
255+
256+ # We need to decrease the bit width in case it increased.
257+ mantissa_carry = ispow2 (mantissa)
258+ mantissa >>= mantissa_carry
259+ elseif romo_is_to_nearest & to_nearest_is_tied & mantissa_is_one
260+ # Mantissa is one, which means the precision is also
261+ # one. Be consistent with the `BigFloat` behavior, for
262+ # example: `BigFloat(3) == BigFloat(3.0) == 4`.
263+ mantissa_carry = true
264+ end
265+ end
266+
267+ # `mantissa` should now again be exactly `requested_bit_width` bits
268+ # wide.
269+
270+ exp = bit_shift_2 - bit_shift_1 + mantissa_carry
271+
272+ (
273+ mantissa = mantissa,
274+ exponent = exp,
275+ is_exact = result_is_exact,
276+ )
277+ end
278+
279+ # `num`, `den` are positive integers. `requested_bit_width` is the
280+ # requested floating-point precision and must be positive. `T` is the
281+ # integer type that we'll be working with mostly, it needs to be wide
282+ # enough.
283+ function rational_to_float_components (
284+ num:: Integer ,
285+ den:: Integer ,
286+ requested_bit_width:: Integer ,
287+ :: Type{T} ,
288+ romo:: RoundingMode ,
289+ ) where {T<: Integer }
290+ (false < requested_bit_width) || error (" nonpositive bit width" )
291+
292+ # Factor out powers of two
293+ trailing_zeros_num = trailing_zeros (num)
294+ trailing_zeros_den = trailing_zeros (den)
295+ num >>= trailing_zeros_num
296+ den >>= trailing_zeros_den
297+
298+ c = rational_to_float_components_impl (
299+ num, den, requested_bit_width, T, romo,
300+ )
301+
302+ (
303+ mantissa = c. mantissa,
304+ exponent = c. exponent + trailing_zeros_num - trailing_zeros_den,
305+ is_exact = c. is_exact,
306+ )
307+ end
308+
309+ function rational_to_float_impl (
310+ to_float:: C ,
311+ :: Type{<:Integer} ,
312+ x:: Rational{Bool} ,
313+ :: Integer ,
314+ ) where {C<: Union{Type,Function} }
315+ n = numerator (x)
316+ if iszero (denominator (x))
317+ if iszero (n)
318+ to_float (NaN ) # 0/0
319+ else
320+ to_float (Inf ) # 1/0
321+ end
322+ else
323+ # n/1 = n
324+ to_float (n)
325+ end
136326end
137327
328+ to_int8_if_bool (n:: Bool ) = Int8 (n)
329+ to_int8_if_bool (n:: Integer ) = n
330+
331+ # Assuming the wanted rounding mode is round to nearest with ties to
332+ # even.
333+ #
334+ # `requested_precision` must be positive.
335+ function rational_to_float_impl (
336+ to_float:: C ,
337+ :: Type{T} ,
338+ x:: Rational ,
339+ requested_precision:: Integer ,
340+ ) where {C<: Union{Type,Function} ,T<: Integer }
341+ s = Int8 (sign (numerator (x)))
342+ a = abs (x)
343+
344+ num = to_int8_if_bool (numerator (a))
345+ den = to_int8_if_bool (denominator (a))
346+
347+ # Handle special cases
348+ num_is_zero = iszero (num)
349+ den_is_zero = iszero (den)
350+ if den_is_zero
351+ num_is_zero && return to_float (NaN )
352+ return to_float (s * Inf )
353+ end
354+ num_is_zero && return to_float (false )
355+
356+ components = rational_to_float_components (
357+ num,
358+ den,
359+ requested_precision,
360+ T,
361+ RoundNearest,
362+ )
363+ mantissa = to_float (components. mantissa)
364+
365+ # TODO : `ldexp` could be replaced with a mere bit of bit twiddling
366+ # in the case of `Float16`, `Float32`, `Float64`
367+ ret = ldexp (s * mantissa, components. exponent)
368+
369+ # TODO : faster?
370+ if iszero (ret) | issubnormal (ret)
371+ # "Rounding to odd" to prevent double rounding error, see
372+ # https://hal-ens-lyon.archives-ouvertes.fr/inria-00080427v2
373+ components = rational_to_float_components (
374+ num,
375+ den,
376+ requested_precision,
377+ T,
378+ RoundToZero,
379+ )
380+ mantissa = to_float (components. mantissa | ! components. is_exact)
381+
382+ # TODO : `ldexp` could be replaced with a mere bit of bit
383+ # twiddling in the case of `Float16`, `Float32`, `Float64`
384+ ret = ldexp (s * mantissa, components. exponent)
385+ end
386+
387+ ret
388+ end
389+
390+ rational_to_float_promote_type (
391+ :: Type{F} ,
392+ :: Type{S} ,
393+ ) where {F<: AbstractFloat ,S<: Integer } =
394+ BigInt
395+
396+ rational_to_float_promote_type (
397+ :: Type{F} ,
398+ :: Type{S} ,
399+ ) where {F<: AbstractFloat ,S<: Unsigned } =
400+ rational_to_float_promote_type (F, signed (S))
401+
402+ # As an optimization, use a narrower type when possible.
403+ rational_to_float_promote_type (:: Type{Float16} , :: Type{<:Union{Int8,Int16}} ) = Int32
404+ rational_to_float_promote_type (:: Type{Float32} , :: Type{<:Union{Int8,Int16}} ) = Int64
405+ rational_to_float_promote_type (:: Type{Float64} , :: Type{<:Union{Int8,Int16}} ) = Int128
406+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32}} , :: Type{Int32} ) = Int64
407+ rational_to_float_promote_type (:: Type{Float64} , :: Type{Int32} ) = Int128
408+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32,Float64}} , :: Type{Int64} ) = Int128
409+
410+ (:: Type{F} )(x:: Rational ) where {F<: AbstractFloat } =
411+ rational_to_float_impl (
412+ F,
413+ rational_to_float_promote_type (
414+ F,
415+ promote_type (
416+ typeof (numerator (x)),
417+ typeof (denominator (x)),
418+ ),
419+ ),
420+ x,
421+ precision (F),
422+ ):: F
423+
424+ AbstractFloat (x:: Q ) where {Q<: Rational } =
425+ float (Q)(x):: AbstractFloat
426+
138427function Rational {T} (x:: AbstractFloat ) where T<: Integer
139428 r = rationalize (T, x, tol= 0 )
140429 x == convert (typeof (x), r) || throw (InexactError (:Rational , Rational{T}, x))
0 commit comments