BF+TST - needs_scaling more robust, with tests

matthew-brett · matthew-brett · commit 2a0a4a810eea · 2012-02-17T21:25:03.000-08:00
``needs_scaling`` routine hit numpy bugs in can_cast function for
structured arrays, and float - int conversions in testing ranges for
large uints.  Found with added tests and fixed.
diff --git a/nibabel/arraywriters.py b/nibabel/arraywriters.py
@@ -90,27 +90,40 @@ def scaling_needed(self):
         data = self._array
         arr_dtype = data.dtype
         out_dtype = self._out_dtype
-        if np.can_cast(arr_dtype, out_dtype):
-            return False
+        # There's a bug in np.can_cast (at least up to and including 1.6.1) such
+        # that any structured output type passes.  Check for this first.
         if 'V' in (arr_dtype.kind, out_dtype.kind):
+            if arr_dtype == out_dtype:
+                return False
             raise WriterError('Cannot cast to or from non-numeric types')
+        if np.can_cast(arr_dtype, out_dtype):
+            return False
+        # Direct casting for complex output from any numeric type
         if out_dtype.kind == 'c':
             return False
         if arr_dtype.kind == 'c':
             raise WriterError('Cannot cast complex types to non-complex')
+        # Direct casting for float output from any non-complex numeric type
         if out_dtype.kind == 'f':
             return False
         # Now we need to look at the data for special cases
         mn, mx = self.finite_range() # this is cached
         if (mn, mx) in ((0, 0), (np.inf, -np.inf)):
             # Data all zero, or no data is finite
             return False
+        # Floats -> (u)ints always need scaling
         if arr_dtype.kind == 'f':
             return True
+        # (u)int input, (u)int output
         assert arr_dtype.kind in 'iu' and out_dtype.kind in 'iu'
         info = np.iinfo(out_dtype)
-        if mn >= info.min and mx <= info.max:
-                return False
+        # No scaling needed if data already fits in output type
+        # But note - we need to convert to ints, to avoid conversion to float
+        # during comparisons, and therefore int -> float conversions which are
+        # not exact.  Only a problem for uint64 though.  We need as_int here to
+        # work around a numpy 1.4.1 bug in uint conversion
+        if as_int(mn) >= as_int(info.min) and as_int(mx) <= as_int(info.max):
+            return False
         return True
 
     @property
diff --git a/nibabel/tests/test_arraywriters.py b/nibabel/tests/test_arraywriters.py
@@ -97,6 +97,83 @@ def test_arraywriters():
             assert_true(arr_back.flags.c_contiguous)
 
 
+def test_scaling_needed():
+    # Structured types return True if dtypes same, raise error otherwise
+    dt_def = [('f', 'i4')]
+    arr = np.ones(10, dt_def)
+    for t in NUMERIC_TYPES:
+        assert_raises(WriterError, ArrayWriter, arr, t)
+        narr = np.ones(10, t)
+        assert_raises(WriterError, ArrayWriter, narr, dt_def)
+    assert_false(ArrayWriter(arr).scaling_needed())
+    assert_false(ArrayWriter(arr, dt_def).scaling_needed())
+    # Any numeric type that can cast, needs no scaling
+    for in_t in NUMERIC_TYPES:
+        for out_t in NUMERIC_TYPES:
+            if np.can_cast(in_t, out_t):
+                aw = ArrayWriter(np.ones(10, in_t), out_t)
+                assert_false(aw.scaling_needed())
+    for in_t in NUMERIC_TYPES:
+        # Numeric types to complex never need scaling
+        arr = np.ones(10, in_t)
+        for out_t in COMPLEX_TYPES:
+            assert_false(ArrayWriter(arr, out_t).scaling_needed())
+    # Attempts to scale from complex to anything else fails
+    for in_t in COMPLEX_TYPES:
+        for out_t in FLOAT_TYPES + IUINT_TYPES:
+            arr = np.ones(10, in_t)
+            assert_raises(WriterError, ArrayWriter, arr, out_t)
+    # Scaling from anything but complex to floats is OK
+    for in_t in FLOAT_TYPES + IUINT_TYPES:
+        arr = np.ones(10, in_t)
+        for out_t in FLOAT_TYPES:
+            assert_false(ArrayWriter(arr, out_t).scaling_needed())
+    # For any other output type, arrays with no data don't need scaling
+    for in_t in FLOAT_TYPES + IUINT_TYPES:
+        arr_0 = np.zeros(10, in_t)
+        arr_e = []
+        for out_t in IUINT_TYPES:
+            assert_false(ArrayWriter(arr_0, out_t).scaling_needed())
+            assert_false(ArrayWriter(arr_e, out_t).scaling_needed())
+    # Going to (u)ints, non-finite arrays don't need scaling
+    for in_t in FLOAT_TYPES:
+        arr_nan = np.zeros(10, in_t) + np.nan
+        arr_inf = np.zeros(10, in_t) + np.inf
+        arr_minf = np.zeros(10, in_t) - np.inf
+        arr_mix = np.array([np.nan, np.inf, -np.inf], dtype=in_t)
+        for out_t in IUINT_TYPES:
+            for arr in (arr_nan, arr_inf, arr_minf, arr_mix):
+                assert_false(ArrayWriter(arr, out_t).scaling_needed())
+    # Floats as input always need scaling
+    for in_t in FLOAT_TYPES:
+        arr = np.ones(10, in_t)
+        for out_t in IUINT_TYPES:
+            # We need an arraywriter that will tolerate construction when
+            # scaling is needed
+            assert_true(SlopeArrayWriter(arr, out_t).scaling_needed())
+    # in-range (u)ints don't need scaling
+    for in_t in IUINT_TYPES:
+        in_info = np.iinfo(in_t)
+        in_min, in_max = in_info.min, in_info.max
+        for out_t in IUINT_TYPES:
+            out_info = np.iinfo(out_t)
+            out_min, out_max = out_info.min, out_info.max
+            if in_min >= out_min and in_max <= out_max:
+                arr = np.array([in_min, in_max], in_t)
+                assert_true(np.can_cast(arr.dtype, out_t))
+                # We've already tested this with can_cast above, but...
+                assert_false(ArrayWriter(arr, out_t).scaling_needed())
+                continue
+            # The output data type does not include the input data range
+            max_min = max(in_min, out_min) # 0 for input or output uint
+            min_max = min(in_max, out_max)
+            arr = np.array([max_min, min_max], in_t)
+            assert_false(ArrayWriter(arr, out_t).scaling_needed())
+            assert_true(SlopeInterArrayWriter(arr + 1, out_t).scaling_needed())
+            if in_t in INT_TYPES:
+                assert_true(SlopeInterArrayWriter(arr - 1, out_t).scaling_needed())
+
+
 def test_special_rt():
     # Test that zeros; none finite - round trip to zeros
     for arr in (np.array([np.inf, np.nan, -np.inf]),