Skip to content

Commit 6a61579

Browse files
committed
Merge pull request #73 from matthew-brett/cleaner-array-writers
Cleaner array writers A fairly large set of changes to make the conversion between float and int, in particular, safer. Previously it was possible to reach undefined behavior with large floats being cast to int. Some tests, not yet committed, suggest that these changes are more accurate than current trunk in some cases, and not less accurate. Merge to get the changes tested. I will commit the tests soon, and an analysis of when errors would be expected in the old code.
2 parents 6e83d54 + 34b7074 commit 6a61579

14 files changed

+2546
-366
lines changed

doc/source/devel/scaling.rst

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
###########################
2+
Scalefactors and intercepts
3+
###########################
4+
5+
SPM Analyze and nifti1 images have *scalefactors*. nifti1 images also have
6+
*intercepts*. If ``A`` is an array in memory, and ``S`` is the array that will
7+
be written to disk, then::
8+
9+
R = (A - intercept) / scalefactor
10+
11+
and ``R == S`` if ``R`` is already the data dtype we need to write.
12+
13+
If we load the image from disk, we exactly recover ``S`` (and ``R``). To get
14+
something approximating ``A`` (say ``Aprime``) we apply the intercept and
15+
scalefactor::
16+
17+
Aprime = (S * scalefactor) + intercept
18+
19+
In a perfect world ``A`` would be exactly the same as ``Aprime``. However
20+
``scalefactor`` and ``intercept`` are floating point values. With floating
21+
point, if ``r = (a - b) / c; p = (r * c) + b`` it is not necessarily true that
22+
``p == a``. For example:
23+
24+
>>> import numpy as np
25+
>>> a = 10
26+
>>> b = np.e
27+
>>> c = np.pi
28+
>>> r = (a - b) / c
29+
>>> p = (r * c) + b
30+
>>> p == a
31+
False
32+
33+
So there will be some error in this reconstruction, even when ``R`` is the same
34+
type as ``S``.
35+
36+
More common is the situation where ``R`` is a different type from ``S``. If
37+
``R`` is of type ``r_dtype``, ``S`` is of type ``s_dtype`` and
38+
``cast_function(R, dtype)`` is some function that casts ``R`` to the desired
39+
type ``dtype``, then::
40+
41+
R = (A - intercept) / scalefactor
42+
S = cast_function(R, s_dtype)
43+
R_prime = cast_function(S, r_dtype)
44+
A_prime = (R_prime * scalefactor) + intercept
45+
46+
The type of ``R`` will depend on what numpy did for upcasting ``A, intercept,
47+
scalefactor``.
48+
49+
In order that ``cast_function(S, r_dtype)`` can best reverse ``cast_function(R,
50+
s_dtype)``, the second needs to know the type of ``R``, which is not stored. The
51+
type of ``R`` depends on the types of ``A`` and of ``intercept, scalefactor``.
52+
We don't know the type of ``A`` because it is not stored.
53+
54+
``R`` is likely to be a floating point type because of the application of
55+
scalefactor and intercept. If ``(intercept, scalefactor)`` are not the identity
56+
(0, 1), then we can ensure that ``R`` is at minimum the type of the ``intercept,
57+
scalefactor`` by making these be at least 1D arrays, so that floating point
58+
types will upcast in ``R = (A - intercept) / scalefactor``.
59+
60+
The cast of ``R`` to ``S`` and back to ``R_prime`` can lose resolution if the
61+
types of ``R`` and ``S`` have different resolution.
62+
63+
Our job is to select:
64+
65+
* scalefactor
66+
* intercept
67+
* ``cast_function``
68+
69+
such that we minimize some measure of difference between ``A`` and
70+
``A_prime``.

nibabel/analyze.py

Lines changed: 35 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,14 @@
8080
can be loaded with and without a default flip, so the saved zoom will not
8181
constrain the affine.
8282
'''
83+
import sys
84+
8385
import numpy as np
8486

85-
from .volumeutils import (native_code, swapped_code, make_dt_codes,
86-
calculate_scale, allopen, shape_zoom_affine,
87-
array_to_file, array_from_file, can_cast,
88-
floating_point_types)
87+
from .volumeutils import (native_code, swapped_code, make_dt_codes, allopen,
88+
shape_zoom_affine, array_from_file, seek_tell,
89+
apply_read_scaling)
90+
from .arraywriters import make_array_writer, get_slope_inter, WriterError
8991
from .wrapstruct import WrapStruct
9092
from .spatialimages import (HeaderDataError, HeaderTypeError,
9193
SpatialImage)
@@ -484,24 +486,10 @@ def data_from_fileobj(self, fileobj):
484486
data = self.raw_data_from_fileobj(fileobj)
485487
# get scalings from header. Value of None means not present in header
486488
slope, inter = self.get_slope_inter()
487-
if slope is None or (slope==1.0 and (inter is None or inter == 0)):
488-
return data
489-
# in-place multiplication and addition on integer types leads to
490-
# integer output types, and disastrous integer rounding.
491-
# We'd like to do inplace if we can, to save memory
492-
is_flt = data.dtype.type in floating_point_types
493-
if slope != 1.0:
494-
if is_flt:
495-
data *= slope
496-
else:
497-
data = data * slope
498-
is_flt = True
499-
if inter:
500-
if is_flt:
501-
data += inter
502-
else:
503-
data = data + inter
504-
return data
489+
slope = 1.0 if slope is None else slope
490+
inter = 0.0 if inter is None else inter
491+
# Upcast as necessary for big slopes, intercepts
492+
return apply_read_scaling(data, slope, inter)
505493

506494
def data_to_fileobj(self, data, fileobj):
507495
''' Write `data` to `fileobj`, maybe modifying `self`
@@ -531,23 +519,23 @@ def data_to_fileobj(self, data, fileobj):
531519
>>> data.astype(np.float64).tostring('F') == str_io.getvalue()
532520
True
533521
'''
534-
data = np.asarray(data)
535-
slope, inter, mn, mx = self.scaling_from_data(data)
522+
data = np.asanyarray(data)
536523
shape = self.get_data_shape()
537524
if data.shape != shape:
538525
raise HeaderDataError('Data should be shape (%s)' %
539526
', '.join(str(s) for s in shape))
540-
offset = self.get_data_offset()
541527
out_dtype = self.get_data_dtype()
542-
array_to_file(data,
543-
fileobj,
544-
out_dtype,
545-
offset,
546-
inter,
547-
slope,
548-
mn,
549-
mx)
550-
self.set_slope_inter(slope, inter)
528+
try:
529+
arr_writer = make_array_writer(data,
530+
out_dtype,
531+
self.has_data_slope,
532+
self.has_data_intercept)
533+
except WriterError:
534+
msg = sys.exc_info()[1] # python 2 / 3 compatibility
535+
raise HeaderTypeError(msg)
536+
seek_tell(fileobj, self.get_data_offset())
537+
arr_writer.to_fileobj(fileobj)
538+
self.set_slope_inter(*get_slope_inter(arr_writer))
551539

552540
def get_data_dtype(self):
553541
''' Get numpy dtype for data
@@ -761,45 +749,6 @@ def set_slope_inter(self, slope, inter=None):
761749
raise HeaderTypeError('Cannot set slope != 1 or intercept != 0 '
762750
'for Analyze headers')
763751

764-
def scaling_from_data(self, data):
765-
''' Calculate slope, intercept, min, max from data given header
766-
767-
Check that the data can be sensibly adapted to this header data
768-
dtype. If the header type does support useful scaling to allow
769-
this, raise a HeaderTypeError.
770-
771-
Parameters
772-
----------
773-
data : array-like
774-
array of data for which to calculate scaling etc
775-
776-
Returns
777-
-------
778-
divslope : None or scalar
779-
divisor for data, after subtracting intercept. If None, then
780-
there are no valid data
781-
intercept : None or scalar
782-
number to subtract from data before writing.
783-
mn : None or scalar
784-
data minimum to write, None means use data minimum
785-
mx : None or scalar
786-
data maximum to write, None means use data maximum
787-
'''
788-
data = np.asarray(data)
789-
out_dtype = self.get_data_dtype()
790-
if not can_cast(data.dtype.type,
791-
out_dtype.type,
792-
self.has_data_intercept,
793-
self.has_data_slope):
794-
raise HeaderTypeError('Cannot cast data to header dtype without'
795-
' large potential loss in precision')
796-
if not self.has_data_slope:
797-
return 1.0, 0.0, None, None
798-
return calculate_scale(
799-
data,
800-
out_dtype,
801-
self.has_data_intercept)
802-
803752
@classmethod
804753
def _get_checks(klass):
805754
''' Return sequence of check functions for this class '''
@@ -961,41 +910,6 @@ def _write_header(self, header_file, header, slope, inter):
961910
header.set_slope_inter(slope, inter)
962911
header.write_to(header_file)
963912

964-
def _write_image(self, image_file, data, header, slope, inter, mn, mx):
965-
''' Utility routine to write image
966-
967-
Parameters
968-
----------
969-
image_file : file-like
970-
file-like object implementing ``seek`` or ``tell``, and
971-
``write``
972-
data : array-like
973-
array to write
974-
header : analyze-type header object
975-
header
976-
slope : None or float
977-
scale factor for `data` so that written data is ``data /
978-
slope + inter``. None means no valid data
979-
inter : float
980-
intercept (see above)
981-
mn : None or float
982-
minimum to scale data to. None means use data minimum
983-
max : None or float
984-
maximum to scale data to. None means use data maximum
985-
986-
Returns
987-
-------
988-
None
989-
'''
990-
shape = header.get_data_shape()
991-
if data.shape != shape:
992-
raise HeaderDataError('Data should be shape (%s)' %
993-
', '.join(str(s) for s in shape))
994-
offset = header.get_data_offset()
995-
out_dtype = header.get_data_dtype()
996-
array_to_file(data, image_file, out_dtype, offset,
997-
inter, slope, mn, mx)
998-
999913
def to_file_map(self, file_map=None):
1000914
''' Write image to `file_map` or contained ``self.file_map``
1001915
@@ -1010,7 +924,11 @@ def to_file_map(self, file_map=None):
1010924
data = self.get_data()
1011925
self.update_header()
1012926
hdr = self.get_header()
1013-
slope, inter, mn, mx = hdr.scaling_from_data(data)
927+
out_dtype = self.get_data_dtype()
928+
arr_writer = make_array_writer(data,
929+
out_dtype,
930+
hdr.has_data_slope,
931+
hdr.has_data_intercept)
1014932
hdr_fh, img_fh = self._get_fileholders(file_map)
1015933
# Check if hdr and img refer to same file; this can happen with odd
1016934
# analyze images but most often this is because it's a single nifti file
@@ -1020,8 +938,15 @@ def to_file_map(self, file_map=None):
1020938
imgf = hdrf
1021939
else:
1022940
imgf = img_fh.get_prepare_fileobj(mode='wb')
941+
slope, inter = get_slope_inter(arr_writer)
1023942
self._write_header(hdrf, hdr, slope, inter)
1024-
self._write_image(imgf, data, hdr, slope, inter, mn, mx)
943+
# Write image
944+
shape = hdr.get_data_shape()
945+
if data.shape != shape:
946+
raise HeaderDataError('Data should be shape (%s)' %
947+
', '.join(str(s) for s in shape))
948+
seek_tell(imgf, hdr.get_data_offset())
949+
arr_writer.to_fileobj(imgf)
1025950
if hdr_fh.fileobj is None: # was filename
1026951
hdrf.close()
1027952
if not hdr_img_same:

0 commit comments

Comments
 (0)