Merge pull request #81 from matthew-brett/float-128-cast-fix

matthew-brett · matthew-brett · commit 627ecd497172 · 2012-01-17T02:08:50.000-08:00
Float 128 cast fix for platform(s) with actual IEEE binary128

I hadn't previously considered precision that high.
diff --git a/nibabel/casting.py b/nibabel/casting.py
@@ -192,8 +192,8 @@ def as_int(x, check=True):
     This is useful because the numpy int(val) mechanism is broken for large
     values in np.longdouble.
 
-    This routine will still break for values that are outside the range of
-    float64.
+    This routine will still raise an OverflowError for values that are outside
+    the range of float64.
 
     Parameters
     ----------
@@ -220,24 +220,31 @@ def as_int(x, check=True):
     >>> as_int(2.1, check=False)
     2
     """
-    ix = int(x)
-    if ix == x:
-        return ix
+    x = np.array(x, copy=True)
     fx = np.floor(x)
     if check and fx != x:
         raise FloatingError('Not an integer: %s' % x)
-    f64 = np.float64(fx)
-    i64 = int(f64)
-    assert f64 == i64
-    res = fx - f64
-    return ix + int(res)
+    if not fx.dtype.type == np.longdouble:
+        return int(x)
+    # Subtract float64 chunks until we have all of the number. If the int is too
+    # large, it will overflow
+    ret = 0
+    while fx != 0:
+        f64 = np.float64(fx)
+        fx -= f64
+        ret += int(f64)
+    return ret
 
 
 def int_to_float(val, flt_type):
     """ Convert integer `val` to floating point type `flt_type`
 
-    Useful because casting to ``np.longdouble`` loses precision as it appears to
-    go through casting to np.float64.
+    Why is this so complicated?
+
+    At least in numpy <= 1.6.1, numpy longdoubles do not correctly convert to
+    ints, and ints do not correctly convert to longdoubles.  Specifically, in
+    both cases, the values seem to go through float64 conversion on the way, so
+    to convert better, we need to split into float64s and sum up the result.
 
     Parameters
     ----------
@@ -253,9 +260,12 @@ def int_to_float(val, flt_type):
     """
     if not flt_type is np.longdouble:
         return flt_type(val)
-    f64 = np.float64(val)
-    res = val - int(f64)
-    return np.longdouble(f64) + np.longdouble(res)
+    faval = np.longdouble(0)
+    while val != 0:
+        f64 = np.float64(val)
+        faval += f64
+        val -= int(f64)
+    return faval
 
 
 def floor_exact(val, flt_type):
@@ -299,33 +309,15 @@ def floor_exact(val, flt_type):
     flt_type = np.dtype(flt_type).type
     sign = val > 0 and 1 or -1
     aval = abs(val)
-    if flt_type is np.longdouble:
-        # longdouble seems to go through casting to float64, so getting the
-        # value into float128 with the given precision needs to go through two
-        # steps, first float64, then adding the remainder.
-        f64 = floor_exact(aval, np.float64)
-        i64 = int(f64)
-        assert f64 == i64
-        res = aval - i64
-        try:
-            faval = flt_type(i64) + flt_type(res)
-        except OverflowError:
-            faval = np.inf
-        if faval == np.inf:
-            return sign * np.finfo(flt_type).max
-        if (faval - f64) <= res:
-            # Float casting has made the value go down or stay the same
-            return sign * faval
-    else: # Normal case
-        try:
-            faval = flt_type(aval)
-        except OverflowError:
-            faval = np.inf
-        if faval == np.inf:
-            return sign * np.finfo(flt_type).max
-        if int(faval) <= aval:
-            # Float casting has made the value go down or stay the same
-            return sign * faval
+    try: # int_to_float deals with longdouble safely
+        faval = int_to_float(aval, flt_type)
+    except OverflowError:
+        faval = np.inf
+    if faval == np.inf:
+        return sign * np.finfo(flt_type).max
+    if as_int(faval) <= aval: # as_int deals with longdouble safely
+        # Float casting has made the value go down or stay the same
+        return sign * faval
     # Float casting made the value go up
     nmant = flt2nmant(flt_type)
     biggest_gap = 2**(floor_log2(aval) - nmant)
diff --git a/nibabel/tests/test_floating.py b/nibabel/tests/test_floating.py
@@ -2,7 +2,8 @@
 """
 import numpy as np
 
-from ..casting import floor_exact, flt2nmant, as_int, FloatingError
+from ..casting import (floor_exact, flt2nmant, as_int, FloatingError,
+                       int_to_float, floor_log2)
 
 from nose import SkipTest
 from nose.tools import assert_equal, assert_raises
@@ -36,15 +37,64 @@ def test_as_int():
     assert_equal(as_int(-2.1, False), -2)
     v = np.longdouble(2**64)
     assert_equal(as_int(v), 2**64)
-    # Have all long doubles got this precision?  Windows 32-bit longdouble
-    # appears to have 52 bit precision, but we avoid that by checking for known
-    # precisions that are less than that required
+    # Have all long doubles got 63+1 binary bits of precision?  Windows 32-bit
+    # longdouble appears to have 52 bit precision, but we avoid that by checking
+    # for known precisions that are less than that required
     try:
         nmant = flt2nmant(np.longdouble)
     except FloatingError:
-        nmant = None # Unknown precision, test and hope
-    if nmant is None or nmant >= 63:
-        assert_equal(as_int(v+2), 2**64+2)
+        nmant = 63 # Unknown precision, let's hope it's at least 63
+    v = np.longdouble(2) ** (nmant + 1) - 1
+    assert_equal(as_int(v), 2**(nmant + 1) -1)
+    # Check for predictable overflow
+    nexp64 = floor_log2(np.finfo(np.float64).max)
+    val = np.longdouble(2**nexp64) * 2 # outside float64 range
+    assert_raises(OverflowError, as_int, val)
+    assert_raises(OverflowError, as_int, -val)
+
+
+def test_int_to_float():
+    # Concert python integer to floating point
+    # Standard float types just return cast value
+    for ie3 in IEEE_floats:
+        nmant = flt2nmant(ie3)
+        for p in range(nmant + 3):
+            i = 2**p+1
+            assert_equal(int_to_float(i, ie3), ie3(i))
+            assert_equal(int_to_float(-i, ie3), ie3(-i))
+        # IEEEs in this case are binary formats only
+        nexp = floor_log2(np.finfo(ie3).max)
+        # Values too large for the format
+        smn, smx = -2**(nexp+1), 2**(nexp+1)
+        if ie3 is np.float64:
+            assert_raises(OverflowError, int_to_float, smn, ie3)
+            assert_raises(OverflowError, int_to_float, smx, ie3)
+        else:
+            assert_equal(int_to_float(smn, ie3), ie3(smn))
+            assert_equal(int_to_float(smx, ie3), ie3(smx))
+    # Longdoubles do better than int, we hope
+    LD = np.longdouble
+    # up to integer precision of float64 nmant, we get the same result as for
+    # casting directly
+    for p in range(flt2nmant(np.float64)+2): # implicit
+        i = 2**p-1
+        assert_equal(int_to_float(i, LD), LD(i))
+        assert_equal(int_to_float(-i, LD), LD(-i))
+    # Above max of float64, we're hosed
+    nexp64 = floor_log2(np.finfo(np.float64).max)
+    smn64, smx64 = -2**(nexp64+1), 2**(nexp64+1)
+    # The algorithm here implemented goes through float64, so supermax and
+    # supermin will cause overflow errors
+    assert_raises(OverflowError, int_to_float, smn64, LD)
+    assert_raises(OverflowError, int_to_float, smx64, LD)
+    try:
+        nmant = flt2nmant(np.longdouble)
+    except FloatingError: # don't know where to test
+        return
+    # Assuming nmant is greater than that for float64, test we recover precision
+    i = 2**(nmant+1)-1
+    assert_equal(as_int(int_to_float(i, LD)), i)
+    assert_equal(as_int(int_to_float(-i, LD)), -i)
 
 
 def test_floor_exact_16():