Skip to content

Commit 5639ac6

Browse files
committed
Base: correctly rounded floats constructed from rationals
Constructing a floating-point number from a `Rational` should now be correctly rounded. Implementation approach: 1. Convert the (numerator, denominator) pair to a (sign bit, integral significand, exponent) triplet using integer arithmetic. The integer type in question must be wide enough. 2. Convert the above triplet into an instance of the chosen FP type. There is special support for IEEE 754 floating-point and for `BigFloat`, otherwise a fallback using `ldexp` is used. As a bonus, constructing a `BigFloat` from a `Rational` should now be thread-safe when the rounding mode and precision are provided to the constructor, because there is no access to the global precision or rounding mode settings. Updates #45213 Updates #50940 Updates #52507 Fixes #52394 Closes #52395 Fixes #52859
1 parent 25c8128 commit 5639ac6

File tree

8 files changed

+782
-20
lines changed

8 files changed

+782
-20
lines changed

base/Base.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ include("rounding.jl")
284284
include("div.jl")
285285
include("rawbigints.jl")
286286
include("float.jl")
287+
include("rational_to_float.jl")
287288
include("twiceprecision.jl")
288289
include("complex.jl")
289290
include("rational.jl")

base/docs/basedocs.jl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3049,6 +3049,25 @@ julia> 4/2
30493049
julia> 4.5/2
30503050
2.25
30513051
```
3052+
3053+
This function may convert integer arguments to a floating-point number type
3054+
([`AbstractFloat`](@ref)), potentially resulting in a loss of accuracy. To avoid this,
3055+
instead construct a [`Rational`](@ref) from the arguments, then convert the resulting
3056+
rational number to a specific floating-point type of your choice:
3057+
3058+
```jldoctest
3059+
julia> n = 100000000000000000
3060+
100000000000000000
3061+
3062+
julia> m = n + 6
3063+
100000000000000006
3064+
3065+
julia> n/m
3066+
1.0
3067+
3068+
julia> Float64(n//m) # `//` constructs a `Rational`
3069+
0.9999999999999999
3070+
```
30523071
"""
30533072
/(x, y)
30543073

base/mpfr.jl

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ import
1919
isone, big, _string_n, decompose, minmax, _precision_with_base_2,
2020
sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
2121
uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
22-
RawBigIntRoundingIncrementHelper, truncated, RawBigInt
22+
RawBigIntRoundingIncrementHelper, truncated, RawBigInt, RationalToFloat,
23+
rational_to_floating_point
2324

2425

2526
using .Base.Libc
@@ -317,12 +318,51 @@ BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=rounding_raw(BigFloa
317318
BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat)) =
318319
BigFloat(Float64(x), r; precision=precision)
319320

320-
function BigFloat(x::Rational, r::MPFRRoundingMode=rounding_raw(BigFloat); precision::Integer=_precision_with_base_2(BigFloat))
321-
setprecision(BigFloat, precision) do
322-
setrounding_raw(BigFloat, r) do
323-
BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
321+
function set_2exp!(z::BigFloat, n::BigInt, exp::Int, rm::MPFRRoundingMode)
322+
ccall(
323+
(:mpfr_set_z_2exp, libmpfr),
324+
Int32,
325+
(Ref{BigFloat}, Ref{BigInt}, Int, MPFRRoundingMode),
326+
z, n, exp, rm,
327+
)
328+
nothing
329+
end
330+
331+
function RationalToFloat.to_floating_point_impl(::Type{BigFloat}, ::Type{BigInt}, num, den, romo, prec)
332+
num_is_zero = iszero(num)
333+
den_is_zero = iszero(den)
334+
s = Int8(sign(num))
335+
sb = signbit(s)
336+
is_zero = num_is_zero & !den_is_zero
337+
is_inf = !num_is_zero & den_is_zero
338+
is_regular = !num_is_zero & !den_is_zero
339+
340+
if is_regular
341+
let rtfc = RationalToFloat.to_float_components
342+
c = rtfc(BigInt, num, den, prec, nothing, romo, sb)
343+
ret = BigFloat(precision = prec)
344+
mpfr_romo = convert(MPFRRoundingMode, romo)
345+
set_2exp!(ret, s * c.integral_significand, Int(c.exponent - prec + true), mpfr_romo)
346+
ret
324347
end
325-
end
348+
else
349+
if is_zero
350+
BigFloat(false, MPFRRoundToZero, precision = prec)
351+
elseif is_inf
352+
BigFloat(s * Inf, MPFRRoundToZero, precision = prec)
353+
else
354+
BigFloat(precision = prec)
355+
end
356+
end::BigFloat
357+
end
358+
359+
function BigFloat(x::Rational, r::RoundingMode; precision::Integer = DEFAULT_PRECISION[])
360+
rational_to_floating_point(BigFloat, x, r, precision)
361+
end
362+
363+
function BigFloat(x::Rational, r::MPFRRoundingMode = ROUNDING_MODE[];
364+
precision::Integer = DEFAULT_PRECISION[])
365+
rational_to_floating_point(BigFloat, x, r, precision)
326366
end
327367

328368
function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=_precision_with_base_2(BigFloat), rounding::MPFRRoundingMode=rounding_raw(BigFloat))

base/rational.jl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -151,19 +151,19 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
151151
(::Type{T})(x::Rational) where {T<:Integer} = (isinteger(x) ? convert(T, x.num)::T :
152152
throw(InexactError(nameof(T), T, x)))
153153

154-
AbstractFloat(x::Rational) = (float(x.num)/float(x.den))::AbstractFloat
155-
function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
156-
P = promote_type(T,S)
157-
convert(T, convert(P,x.num)/convert(P,x.den))::T
158-
end
159-
# avoid spurious overflow (#52394). (Needed for UInt16 or larger;
160-
# we also include Int16 for consistency of accuracy.)
161-
Float16(x::Rational{<:Union{Int16,Int32,Int64,UInt16,UInt32,UInt64}}) =
162-
Float16(Float32(x))
163-
Float16(x::Rational{<:Union{Int128,UInt128}}) =
164-
Float16(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
165-
Float32(x::Rational{<:Union{Int128,UInt128}}) =
166-
Float32(Float64(x)) # UInt128 overflows Float32, include Int128 for consistency
154+
function rational_to_floating_point(::Type{F}, x, rm, prec) where {F}
155+
nd = (numerator(x), denominator(x))
156+
RationalToFloat.to_floating_point(F, nd..., rm, prec)::F
157+
end
158+
159+
function (::Type{F})(x::Rational, rm::RoundingMode = RoundNearest) where {F<:AbstractFloat}
160+
rational_to_floating_point(F, x, rm, precision(F))::F
161+
end
162+
163+
function AbstractFloat(x::Q) where {Q<:Rational}
164+
T = float(Q)
165+
T(x)::T::AbstractFloat
166+
end
167167

168168
function Rational{T}(x::AbstractFloat) where T<:Integer
169169
r = rationalize(T, x, tol=0)

base/rational_to_float.jl

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
module RationalToFloat
4+
5+
# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed
6+
# to compile to a single instruction, because the semantics correspond
7+
# to what hardware usually provides.
8+
function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger}
9+
@inline begin
10+
mask = 8*sizeof(T) - 1
11+
c = b & mask
12+
shift(a, c)
13+
end
14+
end
15+
16+
machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported")
17+
18+
# Fallback for `BigInt` etc.
19+
machine_shift(shift::S, a, b) where {S} = shift(a, b)
20+
21+
# Arguments are positive integers.
22+
function div_significand_with_remainder(num, den, minimum_significand_size)
23+
clamped = x -> max(zero(x), x)::typeof(x)
24+
bw = Base.top_set_bit # bit width
25+
shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2)
26+
t = machine_shift(<<, num, shift)
27+
(divrem(t, den, RoundToZero)..., shift)
28+
end
29+
30+
# `divrem(n, 1<<k, RoundToZero)`
31+
function divrem_2(n, k)
32+
quo = machine_shift(>>>, n, k)
33+
tmp = machine_shift(<<, quo, k)
34+
rem = n - tmp
35+
(quo, rem)
36+
end
37+
38+
function to_float_components_impl(num, den, precision, max_subnormal_exp)
39+
# `+1` because we need an extra, "round", bit for some rounding modes.
40+
#
41+
# TODO: as a performance optimization, only do this when required
42+
# by the rounding mode
43+
prec_p_1 = precision + true
44+
45+
(quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1)
46+
width = Base.top_set_bit(quo0)
47+
excess_width = width - prec_p_1
48+
exp = width - shift - true
49+
50+
exp_underflow = if isnothing(max_subnormal_exp)
51+
zero(exp)
52+
else
53+
let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T
54+
(signbit(d) ? z : d + true)::T
55+
end
56+
end
57+
58+
(quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width)
59+
integral_significand = quo1 >>> true
60+
round_bit = quo1 % Bool
61+
sticky_bit = !iszero(rem1) | !iszero(rem0)
62+
63+
(; integral_significand, exponent = exp, round_bit, sticky_bit)
64+
end
65+
66+
struct RoundingIncrementHelper
67+
final_bit::Bool
68+
round_bit::Bool
69+
sticky_bit::Bool
70+
end
71+
72+
(h::RoundingIncrementHelper)(::Base.Rounding.FinalBit) = h.final_bit
73+
(h::RoundingIncrementHelper)(::Base.Rounding.RoundBit) = h.round_bit
74+
(h::RoundingIncrementHelper)(::Base.Rounding.StickyBit) = h.sticky_bit
75+
76+
function to_float_components_rounded(num, den, precision, max_subnormal_exp, romo, sign_bit)
77+
overflows = (x, p) -> x == machine_shift(<<, one(x), p)
78+
t = to_float_components_impl(num, den, precision, max_subnormal_exp)
79+
raw_significand = t.integral_significand
80+
rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit)
81+
incr = Base.Rounding.correct_rounding_requires_increment(rh, romo, sign_bit)
82+
rounded = raw_significand + incr
83+
(integral_significand, exponent) = let exp = t.exponent
84+
if overflows(rounded, precision)
85+
(rounded >>> true, exp + true)
86+
else
87+
(rounded, exp)
88+
end
89+
end
90+
(; integral_significand, exponent)
91+
end
92+
93+
function to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T}
94+
to_float_components_rounded(abs(T(num)), den, precision, max_subnormal_exp, romo, sb)
95+
end
96+
97+
function to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
98+
num_is_zero = iszero(num)
99+
den_is_zero = iszero(den)
100+
sb = signbit(num)
101+
is_zero = num_is_zero & !den_is_zero
102+
is_inf = !num_is_zero & den_is_zero
103+
is_regular = !num_is_zero & !den_is_zero
104+
if is_regular
105+
let
106+
c = to_float_components(S, num, den, prec, nothing, rm, sb)
107+
signif = T(c.integral_significand)::T
108+
x = ldexp(signif, c.exponent - prec + true)::T
109+
sb ? -x : x
110+
end::T
111+
else
112+
if is_zero
113+
zero(T)::T
114+
elseif is_inf
115+
T(Inf)::T
116+
else
117+
T(NaN)::T
118+
end
119+
end::T
120+
end
121+
122+
function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
123+
to_floating_point_fallback(T, S, num, den, rm, prec)
124+
end
125+
126+
function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S}
127+
num_is_zero = iszero(num)
128+
den_is_zero = iszero(den)
129+
sb = signbit(num)
130+
is_zero = num_is_zero & !den_is_zero
131+
is_inf = !num_is_zero & den_is_zero
132+
is_regular = !num_is_zero & !den_is_zero
133+
Rnd = Base.Rounding
134+
(rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm)
135+
(false, false)
136+
else
137+
let from = Rnd.rounds_away_from_zero(rm, sb)
138+
(!from, from)
139+
end
140+
end::NTuple{2,Bool}
141+
exp_max = Base.exponent_max(T)
142+
exp_min = Base.exponent_min(T)
143+
ieee_repr = Base.ieee754_representation
144+
repr_zero = ieee_repr(T, sb, Val(:zero))
145+
repr_inf = ieee_repr(T, sb, Val(:inf))
146+
repr_nan = ieee_repr(T, sb, Val(:nan))
147+
U = typeof(repr_zero)
148+
repr_zero::U
149+
repr_inf::U
150+
repr_nan::U
151+
152+
ret_u = if is_regular
153+
let
154+
c = to_float_components(S, num, den, prec, exp_min - 1, rm, sb)
155+
exp = c.exponent
156+
exp_diff = exp - exp_min
157+
is_normal = 0 exp_diff
158+
exp_is_huge_p = exp_max < exp
159+
exp_is_huge_n = signbit(exp_diff + prec)
160+
rounds_to_inf = exp_is_huge_p & !rm_is_to_zero
161+
rounds_to_zero = exp_is_huge_n & !rm_is_from_zero
162+
163+
if !rounds_to_zero & !exp_is_huge_p
164+
let signif = (c.integral_significand % U) & Base.significand_mask(T)
165+
exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U
166+
ieee_repr(T, sb, exp_field, signif)::U
167+
end
168+
elseif rounds_to_zero
169+
repr_zero
170+
elseif rounds_to_inf
171+
repr_inf
172+
else
173+
ieee_repr(T, sb, Val(:omega))
174+
end
175+
end
176+
else
177+
if is_zero
178+
repr_zero
179+
elseif is_inf
180+
repr_inf
181+
else
182+
repr_nan
183+
end
184+
end::U
185+
186+
reinterpret(T, ret_u)::T
187+
end
188+
189+
# `BigInt` is a safe default.
190+
to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt
191+
192+
const BitIntegerOrBool = Union{Bool,Base.BitInteger}
193+
194+
# As an optimization, use an integer type narrower than `BigInt` when possible.
195+
function to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool}
196+
Max = if sizeof(F) sizeof(S)
197+
S
198+
else
199+
(S <: Signed) ? Base.inttype(F) : Base.uinttype(F)
200+
end
201+
widen(Max)
202+
end
203+
204+
function to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T}
205+
S = to_float_promote_type(F, T)
206+
to_floating_point_impl(F, S, num, den, rm, prec)
207+
end
208+
209+
end

test/choosetests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const TESTNAMES = [
1111
"char", "strings", "triplequote", "unicode", "intrinsics",
1212
"dict", "hashing", "iobuffer", "staged", "offsetarray",
1313
"arrayops", "tuple", "reduce", "reducedim", "abstractarray",
14-
"intfuncs", "simdloop", "vecelement", "rational",
14+
"intfuncs", "simdloop", "vecelement", "rational", "rational_to_float",
1515
"bitarray", "copy", "math", "fastmath", "functional", "iterators",
1616
"operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
1717
"sorting", "spawn", "backtrace", "exceptions",

0 commit comments

Comments
 (0)