nipy
diff --git a/‎doc/source/devel/scaling.rst
Lines changed: 70 additions & 0 deletions b/‎doc/source/devel/scaling.rst
Lines changed: 70 additions & 0 deletions
diff --git a/‎nibabel/analyze.py
Lines changed: 35 additions & 110 deletions b/‎nibabel/analyze.py
Lines changed: 35 additions & 110 deletions
@@ -0,0 +1,70 @@
+###########################
+Scalefactors and intercepts
+###########################
+
+SPM Analyze and nifti1 images have *scalefactors*.  nifti1 images also have
+*intercepts*.  If ``A`` is an array in memory, and ``S`` is the array that will
+be written to disk, then::
+
+    R = (A - intercept) / scalefactor
+
+and ``R == S`` if ``R`` is already the data dtype we need to write.
+
+If we load the image from disk, we exactly recover ``S`` (and ``R``).  To get
+something approximating ``A`` (say ``Aprime``) we apply the intercept and
+scalefactor::
+
+    Aprime = (S * scalefactor) + intercept
+
+In a perfect world ``A`` would be exactly the same as ``Aprime``.  However
+``scalefactor`` and ``intercept`` are floating point values.  With floating
+point, if ``r = (a - b) / c; p = (r * c) + b`` it is not necessarily true that
+``p == a``. For example:
+
+>>> import numpy as np
+>>> a = 10
+>>> b = np.e
+>>> c = np.pi
+>>> r = (a - b) / c
+>>> p = (r * c) + b
+>>> p == a
+False
+
+So there will be some error in this reconstruction, even when ``R`` is the same
+type as ``S``.
+
+More common is the situation where ``R`` is a different type from ``S``.  If
+``R`` is of type ``r_dtype``, ``S`` is of type ``s_dtype`` and
+``cast_function(R, dtype)`` is some function that casts ``R`` to the desired
+type ``dtype``, then::
+
+    R = (A - intercept) / scalefactor
+    S = cast_function(R, s_dtype)
+    R_prime = cast_function(S, r_dtype)
+    A_prime = (R_prime * scalefactor) + intercept
+
+The type of ``R`` will depend on what numpy did for upcasting ``A, intercept,
+scalefactor``.
+
+In order that ``cast_function(S, r_dtype)`` can best reverse ``cast_function(R,
+s_dtype)``, the second needs to know the type of ``R``, which is not stored. The
+type of ``R`` depends on the types of ``A`` and of ``intercept, scalefactor``.
+We don't know the type of ``A`` because it is not stored.
+
+``R`` is likely to be a floating point type because of the application of
+scalefactor and intercept. If ``(intercept, scalefactor)`` are not the identity
+(0, 1), then we can ensure that ``R`` is at minimum the type of the ``intercept,
+scalefactor`` by making these be at least 1D arrays, so that floating point
+types will upcast in ``R = (A - intercept) / scalefactor``.
+
+The cast of ``R`` to ``S`` and back to ``R_prime`` can lose resolution if the
+types of ``R`` and ``S`` have different resolution.
+
+Our job is to select:
+
+* scalefactor
+* intercept
+* ``cast_function``
+
+such that we minimize some measure of difference between ``A`` and
+``A_prime``.
@@ -80,12 +80,14 @@
 can be loaded with and without a default flip, so the saved zoom will not
 constrain the affine.
 '''
+import sys
+
 import numpy as np
 
-from .volumeutils import (native_code, swapped_code, make_dt_codes,
-                          calculate_scale, allopen, shape_zoom_affine,
-                          array_to_file, array_from_file, can_cast,
-                          floating_point_types)
+from .volumeutils import (native_code, swapped_code, make_dt_codes, allopen,
+                          shape_zoom_affine, array_from_file, seek_tell,
+                          apply_read_scaling)
+from .arraywriters import make_array_writer, get_slope_inter, WriterError
 from .wrapstruct import WrapStruct
 from .spatialimages import (HeaderDataError, HeaderTypeError,
                             SpatialImage)
@@ -484,24 +486,10 @@ def data_from_fileobj(self, fileobj):
         data = self.raw_data_from_fileobj(fileobj)
         # get scalings from header.  Value of None means not present in header
         slope, inter = self.get_slope_inter()
-        if slope is None or (slope==1.0 and (inter is None or inter == 0)):
-            return data
-        # in-place multiplication and addition on integer types leads to
-        # integer output types, and disastrous integer rounding.
-        # We'd like to do inplace if we can, to save memory
-        is_flt = data.dtype.type in floating_point_types
-        if slope != 1.0:
-            if is_flt:
-                data *= slope
-            else:
-                data = data * slope
-                is_flt = True
-        if inter:
-            if is_flt:
-                data += inter
-            else:
-                data = data + inter
-        return data
+        slope = 1.0 if slope is None else slope
+        inter = 0.0 if inter is None else inter
+        # Upcast as necessary for big slopes, intercepts
+        return apply_read_scaling(data, slope, inter)
 
     def data_to_fileobj(self, data, fileobj):
         ''' Write `data` to `fileobj`, maybe modifying `self`
@@ -531,23 +519,23 @@ def data_to_fileobj(self, data, fileobj):
         >>> data.astype(np.float64).tostring('F') == str_io.getvalue()
         True
         '''
-        data = np.asarray(data)
-        slope, inter, mn, mx = self.scaling_from_data(data)
+        data = np.asanyarray(data)
         shape = self.get_data_shape()
         if data.shape != shape:
             raise HeaderDataError('Data should be shape (%s)' %
                                   ', '.join(str(s) for s in shape))
-        offset = self.get_data_offset()
         out_dtype = self.get_data_dtype()
-        array_to_file(data,
-                      fileobj,
-                      out_dtype,
-                      offset,
-                      inter,
-                      slope,
-                      mn,
-                      mx)
-        self.set_slope_inter(slope, inter)
+        try:
+            arr_writer = make_array_writer(data,
+                                           out_dtype,
+                                           self.has_data_slope,
+                                           self.has_data_intercept)
+        except WriterError:
+            msg = sys.exc_info()[1] # python 2 / 3 compatibility
+            raise HeaderTypeError(msg)
+        seek_tell(fileobj, self.get_data_offset())
+        arr_writer.to_fileobj(fileobj)
+        self.set_slope_inter(*get_slope_inter(arr_writer))
 
     def get_data_dtype(self):
         ''' Get numpy dtype for data
@@ -761,45 +749,6 @@ def set_slope_inter(self, slope, inter=None):
         raise HeaderTypeError('Cannot set slope != 1 or intercept != 0 '
                               'for Analyze headers')
 
-    def scaling_from_data(self, data):
-        ''' Calculate slope, intercept, min, max from data given header
-
-        Check that the data can be sensibly adapted to this header data
-        dtype.  If the header type does support useful scaling to allow
-        this, raise a HeaderTypeError.
-
-        Parameters
-        ----------
-        data : array-like
-           array of data for which to calculate scaling etc
-
-        Returns
-        -------
-        divslope : None or scalar
-           divisor for data, after subtracting intercept.  If None, then
-           there are no valid data
-        intercept : None or scalar
-           number to subtract from data before writing.
-        mn : None or scalar
-           data minimum to write, None means use data minimum
-        mx : None or scalar
-           data maximum to write, None means use data maximum
-        '''
-        data = np.asarray(data)
-        out_dtype = self.get_data_dtype()
-        if not can_cast(data.dtype.type,
-                        out_dtype.type,
-                        self.has_data_intercept,
-                        self.has_data_slope):
-            raise HeaderTypeError('Cannot cast data to header dtype without'
-                                  ' large potential loss in precision')
-        if not self.has_data_slope:
-            return 1.0, 0.0, None, None
-        return calculate_scale(
-            data,
-            out_dtype,
-            self.has_data_intercept)
-
     @classmethod
     def _get_checks(klass):
         ''' Return sequence of check functions for this class '''
@@ -961,41 +910,6 @@ def _write_header(self, header_file, header, slope, inter):
         header.set_slope_inter(slope, inter)
         header.write_to(header_file)
 
-    def _write_image(self, image_file, data, header, slope, inter, mn, mx):
-        ''' Utility routine to write image
-
-        Parameters
-        ----------
-        image_file : file-like
-           file-like object implementing ``seek`` or ``tell``, and
-           ``write``
-        data : array-like
-           array to write
-        header : analyze-type header object
-           header
-        slope : None or float
-           scale factor for `data` so that written data is ``data /
-           slope + inter``.  None means no valid data
-        inter : float
-           intercept (see above)
-        mn : None or float
-           minimum to scale data to.  None means use data minimum
-        max : None or float
-           maximum to scale data to.  None means use data maximum
-
-        Returns
-        -------
-        None
-        '''
-        shape = header.get_data_shape()
-        if data.shape != shape:
-            raise HeaderDataError('Data should be shape (%s)' %
-                                  ', '.join(str(s) for s in shape))
-        offset = header.get_data_offset()
-        out_dtype = header.get_data_dtype()
-        array_to_file(data, image_file, out_dtype, offset,
-                      inter, slope, mn, mx)
-
     def to_file_map(self, file_map=None):
         ''' Write image to `file_map` or contained ``self.file_map``
 
@@ -1010,7 +924,11 @@ def to_file_map(self, file_map=None):
         data = self.get_data()
         self.update_header()
         hdr = self.get_header()
-        slope, inter, mn, mx = hdr.scaling_from_data(data)
+        out_dtype = self.get_data_dtype()
+        arr_writer = make_array_writer(data,
+                                       out_dtype,
+                                       hdr.has_data_slope,
+                                       hdr.has_data_intercept)
         hdr_fh, img_fh = self._get_fileholders(file_map)
         # Check if hdr and img refer to same file; this can happen with odd
         # analyze images but most often this is because it's a single nifti file
@@ -1020,8 +938,15 @@ def to_file_map(self, file_map=None):
             imgf = hdrf
         else:
             imgf = img_fh.get_prepare_fileobj(mode='wb')
+        slope, inter = get_slope_inter(arr_writer)
         self._write_header(hdrf, hdr, slope, inter)
-        self._write_image(imgf, data, hdr, slope, inter, mn, mx)
+        # Write image
+        shape = hdr.get_data_shape()
+        if data.shape != shape:
+            raise HeaderDataError('Data should be shape (%s)' %
+                                  ', '.join(str(s) for s in shape))
+        seek_tell(imgf, hdr.get_data_offset())
+        arr_writer.to_fileobj(imgf)
         if hdr_fh.fileobj is None: # was filename
             hdrf.close()
         if not hdr_img_same: