Skip to content

Commit cb7d446

Browse files
authored
Improve floating-point Euclidean division for Float16 and Float32 (#49637)
1 parent fbbce04 commit cb7d446

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

base/div.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,9 @@ end
381381
# NOTE: C89 fmod() and x87 FPREM implicitly provide truncating float division,
382382
# so it is used here as the basis of float div().
383383
div(x::T, y::T, r::RoundingMode) where {T<:AbstractFloat} = convert(T, round((x - rem(x, y, r)) / y))
384+
385+
# Vincent Lefèvre: "The Euclidean Division Implemented with a Floating-Point Division and a Floor"
386+
# https://inria.hal.science/inria-00070403
387+
# Theorem 1 implies that the following are exact if eps(x/y) <= 1
388+
div(x::Float32, y::Float32, r::RoundingMode) = Float32(round(Float64(x) / Float64(y), r))
389+
div(x::Float16, y::Float16, r::RoundingMode) = Float16(round(Float32(x) / Float32(y), r))

test/numbers.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,6 +1770,27 @@ end
17701770
@test cld(-1.1, 0.1) == div(-1.1, 0.1, RoundUp) == ceil(big(-1.1)/big(0.1)) == -11.0
17711771
@test fld(-1.1, 0.1) == div(-1.1, 0.1, RoundDown) == floor(big(-1.1)/big(0.1)) == -12.0
17721772
end
1773+
@testset "issue #49450" begin
1774+
@test div(514, Float16(0.75)) === Float16(685)
1775+
@test fld(514, Float16(0.75)) === Float16(685)
1776+
@test cld(515, Float16(0.75)) === Float16(687)
1777+
1778+
@test cld(1, Float16(0.000999)) === Float16(1001)
1779+
@test cld(2, Float16(0.001999)) === Float16(1001)
1780+
@test cld(3, Float16(0.002934)) === Float16(1023)
1781+
@test cld(4, Float16(0.003998)) === Float16(1001)
1782+
@test fld(5, Float16(0.004925)) === Float16(1015)
1783+
1784+
@test div(4_194_307, Float32(0.75)) === Float32(5_592_409)
1785+
@test fld(4_194_307, Float32(0.75)) === Float32(5_592_409)
1786+
@test cld(4_194_308, Float32(0.75)) === Float32(5_592_411)
1787+
1788+
@test fld(5, Float32(6.556511e-7)) === Float32(7_626_007)
1789+
@test fld(10, Float32(1.3113022e-6)) === Float32(7_626_007)
1790+
@test fld(11, Float32(1.4305115e-6)) === Float32(7_689_557)
1791+
@test cld(16, Float32(2.8014183e-6)) === Float32(5_711_393)
1792+
@test cld(17, Float32(2.2053719e-6)) === Float32(7_708_451)
1793+
end
17731794
end
17741795
@testset "return types" begin
17751796
for T in (Int8,Int16,Int32,Int64,Int128, UInt8,UInt16,UInt32,UInt64,UInt128)

0 commit comments

Comments
 (0)