Skip to content

Commit 5dba096

Browse files
authored
backport fix float16(::Union{Float64,BigFloat}) (JuliaLang#43092)
1 parent ee261dc commit 5dba096

File tree

3 files changed

+57
-4
lines changed

3 files changed

+57
-4
lines changed

base/mpfr.jl

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,23 @@ Float64(x::BigFloat, r::RoundingMode) = Float64(x, convert(MPFRRoundingMode, r))
337337
Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
338338
_cpynansgn(ccall((:mpfr_get_flt,:libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
339339
Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
340-
341-
# TODO: avoid double rounding
342-
Float16(x::BigFloat) = Float16(Float64(x))
340+
function Float16(x::BigFloat) :: Float16
341+
res = Float32(x)
342+
resi = reinterpret(UInt32, res)
343+
if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
344+
#shift so that the mantissa lines up where it would for normal Float16
345+
shift = 113-((resi & 0x7f800000)>>23)
346+
if shift<23
347+
resi |= 0x0080_0000 # set implicit bit
348+
resi >>= shift
349+
end
350+
end
351+
if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
352+
# adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
353+
res = nextfloat(res, cmp(x, res))
354+
end
355+
return res
356+
end
343357

344358
promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
345359
promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat

src/intrinsics.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1652,7 +1652,24 @@ extern "C" JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
16521652

16531653
extern "C" JL_DLLEXPORT uint16_t __truncdfhf2(double param)
16541654
{
1655-
return float_to_half((float)param);
1655+
float res = (float)param;
1656+
uint32_t resi;
1657+
memcpy(&resi, &res, sizeof(res));
1658+
if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
1659+
// shift so that the mantissa lines up where it would for normal Float16
1660+
uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
1661+
if (shift<23u) {
1662+
resi |= 0x00800000; // set implicit bit
1663+
resi >>= shift;
1664+
}
1665+
}
1666+
if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
1667+
memcpy(&resi, &res, sizeof(res));
1668+
// adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
1669+
resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
1670+
memcpy(&res, &resi, sizeof(res));
1671+
}
1672+
return float_to_half(res);
16561673
}
16571674

16581675
#endif

test/float16.jl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,25 @@ const minsubf16_32 = Float32(minsubf16)
199199

200200
# issues #33076
201201
@test Float16(1f5) == Inf16
202+
203+
@testset "conversion to Float16 from" begin
204+
for T in (Float32, Float64, BigFloat)
205+
@testset "conversion from $T" begin
206+
for i in 1:2^16
207+
f = reinterpret(Float16, UInt16(i-1))
208+
isfinite(f) || continue
209+
if f < 0
210+
epsdown = T(eps(f))/2
211+
epsup = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2
212+
else
213+
epsup = T(eps(f))/2
214+
epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2
215+
end
216+
@test isequal(f*(-1)^(f === Float16(0)), Float16(nextfloat(T(f) - epsdown)))
217+
@test isequal(f*(-1)^(f === -Float16(0)), Float16(prevfloat(T(f) + epsup)))
218+
@test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown)))
219+
@test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup)))
220+
end
221+
end
222+
end
223+
end

0 commit comments

Comments
 (0)