Skip to content

Commit 627ecd4

Browse files
committed
Merge pull request #81 from matthew-brett/float-128-cast-fix
Float 128 cast fix for platform(s) with actual IEEE binary128 I hadn't previously considered precision that high.
2 parents 6e8de04 + d123ccd commit 627ecd4

File tree

2 files changed

+91
-49
lines changed

2 files changed

+91
-49
lines changed

nibabel/casting.py

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,8 @@ def as_int(x, check=True):
192192
This is useful because the numpy int(val) mechanism is broken for large
193193
values in np.longdouble.
194194
195-
This routine will still break for values that are outside the range of
196-
float64.
195+
This routine will still raise an OverflowError for values that are outside
196+
the range of float64.
197197
198198
Parameters
199199
----------
@@ -220,24 +220,31 @@ def as_int(x, check=True):
220220
>>> as_int(2.1, check=False)
221221
2
222222
"""
223-
ix = int(x)
224-
if ix == x:
225-
return ix
223+
x = np.array(x, copy=True)
226224
fx = np.floor(x)
227225
if check and fx != x:
228226
raise FloatingError('Not an integer: %s' % x)
229-
f64 = np.float64(fx)
230-
i64 = int(f64)
231-
assert f64 == i64
232-
res = fx - f64
233-
return ix + int(res)
227+
if not fx.dtype.type == np.longdouble:
228+
return int(x)
229+
# Subtract float64 chunks until we have all of the number. If the int is too
230+
# large, it will overflow
231+
ret = 0
232+
while fx != 0:
233+
f64 = np.float64(fx)
234+
fx -= f64
235+
ret += int(f64)
236+
return ret
234237

235238

236239
def int_to_float(val, flt_type):
237240
""" Convert integer `val` to floating point type `flt_type`
238241
239-
Useful because casting to ``np.longdouble`` loses precision as it appears to
240-
go through casting to np.float64.
242+
Why is this so complicated?
243+
244+
At least in numpy <= 1.6.1, numpy longdoubles do not correctly convert to
245+
ints, and ints do not correctly convert to longdoubles. Specifically, in
246+
both cases, the values seem to go through float64 conversion on the way, so
247+
to convert better, we need to split into float64s and sum up the result.
241248
242249
Parameters
243250
----------
@@ -253,9 +260,12 @@ def int_to_float(val, flt_type):
253260
"""
254261
if not flt_type is np.longdouble:
255262
return flt_type(val)
256-
f64 = np.float64(val)
257-
res = val - int(f64)
258-
return np.longdouble(f64) + np.longdouble(res)
263+
faval = np.longdouble(0)
264+
while val != 0:
265+
f64 = np.float64(val)
266+
faval += f64
267+
val -= int(f64)
268+
return faval
259269

260270

261271
def floor_exact(val, flt_type):
@@ -299,33 +309,15 @@ def floor_exact(val, flt_type):
299309
flt_type = np.dtype(flt_type).type
300310
sign = val > 0 and 1 or -1
301311
aval = abs(val)
302-
if flt_type is np.longdouble:
303-
# longdouble seems to go through casting to float64, so getting the
304-
# value into float128 with the given precision needs to go through two
305-
# steps, first float64, then adding the remainder.
306-
f64 = floor_exact(aval, np.float64)
307-
i64 = int(f64)
308-
assert f64 == i64
309-
res = aval - i64
310-
try:
311-
faval = flt_type(i64) + flt_type(res)
312-
except OverflowError:
313-
faval = np.inf
314-
if faval == np.inf:
315-
return sign * np.finfo(flt_type).max
316-
if (faval - f64) <= res:
317-
# Float casting has made the value go down or stay the same
318-
return sign * faval
319-
else: # Normal case
320-
try:
321-
faval = flt_type(aval)
322-
except OverflowError:
323-
faval = np.inf
324-
if faval == np.inf:
325-
return sign * np.finfo(flt_type).max
326-
if int(faval) <= aval:
327-
# Float casting has made the value go down or stay the same
328-
return sign * faval
312+
try: # int_to_float deals with longdouble safely
313+
faval = int_to_float(aval, flt_type)
314+
except OverflowError:
315+
faval = np.inf
316+
if faval == np.inf:
317+
return sign * np.finfo(flt_type).max
318+
if as_int(faval) <= aval: # as_int deals with longdouble safely
319+
# Float casting has made the value go down or stay the same
320+
return sign * faval
329321
# Float casting made the value go up
330322
nmant = flt2nmant(flt_type)
331323
biggest_gap = 2**(floor_log2(aval) - nmant)

nibabel/tests/test_floating.py

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
"""
33
import numpy as np
44

5-
from ..casting import floor_exact, flt2nmant, as_int, FloatingError
5+
from ..casting import (floor_exact, flt2nmant, as_int, FloatingError,
6+
int_to_float, floor_log2)
67

78
from nose import SkipTest
89
from nose.tools import assert_equal, assert_raises
@@ -36,15 +37,64 @@ def test_as_int():
3637
assert_equal(as_int(-2.1, False), -2)
3738
v = np.longdouble(2**64)
3839
assert_equal(as_int(v), 2**64)
39-
# Have all long doubles got this precision? Windows 32-bit longdouble
40-
# appears to have 52 bit precision, but we avoid that by checking for known
41-
# precisions that are less than that required
40+
# Have all long doubles got 63+1 binary bits of precision? Windows 32-bit
41+
# longdouble appears to have 52 bit precision, but we avoid that by checking
42+
# for known precisions that are less than that required
4243
try:
4344
nmant = flt2nmant(np.longdouble)
4445
except FloatingError:
45-
nmant = None # Unknown precision, test and hope
46-
if nmant is None or nmant >= 63:
47-
assert_equal(as_int(v+2), 2**64+2)
46+
nmant = 63 # Unknown precision, let's hope it's at least 63
47+
v = np.longdouble(2) ** (nmant + 1) - 1
48+
assert_equal(as_int(v), 2**(nmant + 1) -1)
49+
# Check for predictable overflow
50+
nexp64 = floor_log2(np.finfo(np.float64).max)
51+
val = np.longdouble(2**nexp64) * 2 # outside float64 range
52+
assert_raises(OverflowError, as_int, val)
53+
assert_raises(OverflowError, as_int, -val)
54+
55+
56+
def test_int_to_float():
57+
# Concert python integer to floating point
58+
# Standard float types just return cast value
59+
for ie3 in IEEE_floats:
60+
nmant = flt2nmant(ie3)
61+
for p in range(nmant + 3):
62+
i = 2**p+1
63+
assert_equal(int_to_float(i, ie3), ie3(i))
64+
assert_equal(int_to_float(-i, ie3), ie3(-i))
65+
# IEEEs in this case are binary formats only
66+
nexp = floor_log2(np.finfo(ie3).max)
67+
# Values too large for the format
68+
smn, smx = -2**(nexp+1), 2**(nexp+1)
69+
if ie3 is np.float64:
70+
assert_raises(OverflowError, int_to_float, smn, ie3)
71+
assert_raises(OverflowError, int_to_float, smx, ie3)
72+
else:
73+
assert_equal(int_to_float(smn, ie3), ie3(smn))
74+
assert_equal(int_to_float(smx, ie3), ie3(smx))
75+
# Longdoubles do better than int, we hope
76+
LD = np.longdouble
77+
# up to integer precision of float64 nmant, we get the same result as for
78+
# casting directly
79+
for p in range(flt2nmant(np.float64)+2): # implicit
80+
i = 2**p-1
81+
assert_equal(int_to_float(i, LD), LD(i))
82+
assert_equal(int_to_float(-i, LD), LD(-i))
83+
# Above max of float64, we're hosed
84+
nexp64 = floor_log2(np.finfo(np.float64).max)
85+
smn64, smx64 = -2**(nexp64+1), 2**(nexp64+1)
86+
# The algorithm here implemented goes through float64, so supermax and
87+
# supermin will cause overflow errors
88+
assert_raises(OverflowError, int_to_float, smn64, LD)
89+
assert_raises(OverflowError, int_to_float, smx64, LD)
90+
try:
91+
nmant = flt2nmant(np.longdouble)
92+
except FloatingError: # don't know where to test
93+
return
94+
# Assuming nmant is greater than that for float64, test we recover precision
95+
i = 2**(nmant+1)-1
96+
assert_equal(as_int(int_to_float(i, LD)), i)
97+
assert_equal(as_int(int_to_float(-i, LD)), -i)
4898

4999

50100
def test_floor_exact_16():

0 commit comments

Comments
 (0)