Skip to content

Commit addb493

Browse files
committed
BF - fix to casting for real binary128 format
Converting an int to real binary128 (available on s/390 - who knew?) revealed that simple two-pass split of the into into float64 wasn't enough because even the second part of the split could be not exactly represented in float64.
1 parent 1046c11 commit addb493

File tree

1 file changed

+30
-32
lines changed

1 file changed

+30
-32
lines changed

nibabel/casting.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,27 @@ def int_to_float(val, flt_type):
251251
f : numpy scalar
252252
of type `flt_type`
253253
"""
254-
if not flt_type is np.longdouble:
255-
return flt_type(val)
256-
f64 = np.float64(val)
257-
res = val - int(f64)
258-
return np.longdouble(f64) + np.longdouble(res)
254+
if flt_type is np.longdouble:
255+
return _int2ld(val)
256+
return flt_type(val)
257+
258+
259+
def _int2ld(val):
260+
""" Convert int to long double
261+
262+
Why is this so complicated?
263+
264+
At least in numpy <= 1.6.1, numpy longdoubles do not correctly convert to
265+
ints, and ints do not correctly convert to longdoubles. Specifically, in
266+
both cases, the values seem to go through float64 conversion on the way, so
267+
to convert better, we need to split into float64s and sum up the result.
268+
"""
269+
faval = np.longdouble(0)
270+
while val != 0:
271+
f64 = np.float64(val)
272+
faval += f64
273+
val -= int(f64)
274+
return faval
259275

260276

261277
def floor_exact(val, flt_type):
@@ -299,33 +315,15 @@ def floor_exact(val, flt_type):
299315
flt_type = np.dtype(flt_type).type
300316
sign = val > 0 and 1 or -1
301317
aval = abs(val)
302-
if flt_type is np.longdouble:
303-
# longdouble seems to go through casting to float64, so getting the
304-
# value into float128 with the given precision needs to go through two
305-
# steps, first float64, then adding the remainder.
306-
f64 = floor_exact(aval, np.float64)
307-
i64 = int(f64)
308-
assert f64 == i64
309-
res = aval - i64
310-
try:
311-
faval = flt_type(i64) + flt_type(res)
312-
except OverflowError:
313-
faval = np.inf
314-
if faval == np.inf:
315-
return sign * np.finfo(flt_type).max
316-
if (faval - f64) <= res:
317-
# Float casting has made the value go down or stay the same
318-
return sign * faval
319-
else: # Normal case
320-
try:
321-
faval = flt_type(aval)
322-
except OverflowError:
323-
faval = np.inf
324-
if faval == np.inf:
325-
return sign * np.finfo(flt_type).max
326-
if int(faval) <= aval:
327-
# Float casting has made the value go down or stay the same
328-
return sign * faval
318+
try: # int_to_float deals with longdouble safely
319+
faval = int_to_float(aval, flt_type)
320+
except OverflowError:
321+
faval = np.inf
322+
if faval == np.inf:
323+
return sign * np.finfo(flt_type).max
324+
if as_int(faval) <= aval: # as_int deals with longdouble safely
325+
# Float casting has made the value go down or stay the same
326+
return sign * faval
329327
# Float casting made the value go up
330328
nmant = flt2nmant(flt_type)
331329
biggest_gap = 2**(floor_log2(aval) - nmant)

0 commit comments

Comments
 (0)