From ccd745347840f147a480cbdf9351df83db69c14d Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 19 Nov 2019 09:50:26 -0500 Subject: [PATCH 1/7] FIX: Accept dtype parameter to ArrayProxy.__array__ --- nibabel/arrayproxy.py | 7 +++++-- nibabel/ecat.py | 5 +++-- nibabel/minc1.py | 7 +++++-- nibabel/parrec.py | 7 +++++-- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index b8313e8ae4..d7d96b24fb 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -405,8 +405,11 @@ def get_scaled(self, dtype=None): """ return self._get_scaled(dtype=dtype, slicer=()) - def __array__(self): - return self._get_scaled(dtype=None, slicer=()) + def __array__(self, dtype=None): + arr = self._get_scaled(dtype=dtype, slicer=()) + if dtype is not None: + arr = arr.astype(dtype, copy=False) + return arr def __getitem__(self, slicer): return self._get_scaled(dtype=None, slicer=slicer) diff --git a/nibabel/ecat.py b/nibabel/ecat.py index a0923f0753..7686a8098b 100644 --- a/nibabel/ecat.py +++ b/nibabel/ecat.py @@ -689,7 +689,7 @@ def ndim(self): def is_proxy(self): return True - def __array__(self): + def __array__(self, dtype=None): ''' Read of data from file This reads ALL FRAMES into one array, can be memory expensive. @@ -697,7 +697,8 @@ def __array__(self): If you want to read only some slices, use the slicing syntax (``__getitem__``) below, or ``subheader.data_from_fileobj(frame)`` ''' - data = np.empty(self.shape) + # dtype=None is interpreted as float64 + data = np.empty(self.shape, dtype=dtype) frame_mapping = get_frame_order(self._subheader._mlist) for i in sorted(frame_mapping): data[:, :, :, i] = self._subheader.data_from_fileobj( diff --git a/nibabel/minc1.py b/nibabel/minc1.py index c41af5c454..2c54be841e 100644 --- a/nibabel/minc1.py +++ b/nibabel/minc1.py @@ -290,9 +290,12 @@ def get_scaled(self, dtype=None): """ return self._get_scaled(dtype=dtype, slicer=()) - def __array__(self): + def __array__(self, dtype=None): ''' Read of data from file ''' - return self._get_scaled(dtype=None, slicer=()) + arr = self._get_scaled(dtype=dtype, slicer=()) + if dtype is not None: + arr = arr.astype(dtype, copy=False) + return arr def __getitem__(self, sliceobj): """ Read slice `sliceobj` of data from file """ diff --git a/nibabel/parrec.py b/nibabel/parrec.py index 59eac79809..5e26f67e0a 100644 --- a/nibabel/parrec.py +++ b/nibabel/parrec.py @@ -699,8 +699,11 @@ def get_scaled(self, dtype=None): """ return self._get_scaled(dtype=dtype, slicer=()) - def __array__(self): - return self._get_scaled(dtype=None, slicer=()) + def __array__(self, dtype=None): + arr = self._get_scaled(dtype=dtype, slicer=()) + if dtype is not None: + arr = arr.astype(dtype, copy=False) + return arr def __getitem__(self, slicer): return self._get_scaled(dtype=None, slicer=slicer) From 5b76c8e4dd039305dac0836b2871b4ca59562037 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 19 Nov 2019 10:04:44 -0500 Subject: [PATCH 2/7] TEST: Validate dataobj.__array__(dtype) --- nibabel/tests/test_proxy_api.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/nibabel/tests/test_proxy_api.py b/nibabel/tests/test_proxy_api.py index 1694254a0c..b0adc52987 100644 --- a/nibabel/tests/test_proxy_api.py +++ b/nibabel/tests/test_proxy_api.py @@ -132,6 +132,27 @@ def validate_asarray(self, pmaker, params): # Shape matches expected shape assert_equal(out.shape, params['shape']) + def validate_array_interface_with_dtype(self, pmaker, params): + # Check proxy returns expected array from asarray + prox, fio, hdr = pmaker() + orig = np.array(prox, dtype=None) + assert_array_equal(orig, params['arr_out']) + assert_dt_equal(orig.dtype, params['dtype_out']) + + for dtype in np.sctypes['float'] + np.sctypes['int'] + np.sctypes['uint']: + # Directly coerce with a dtype + direct = dtype(prox) + assert_almost_equal(direct, orig.astype(dtype)) + assert_dt_equal(direct.dtype, np.dtype(dtype)) + assert_equal(direct.shape, params['shape']) + # All three methods should produce equivalent results + for arrmethod in (np.array, np.asarray, np.asanyarray): + out = arrmethod(prox, dtype=dtype) + assert_array_equal(out, direct) + assert_dt_equal(out.dtype, np.dtype(dtype)) + # Shape matches expected shape + assert_equal(out.shape, params['shape']) + def validate_get_scaled(self, pmaker, params): # Check proxy returns expected array from asarray prox, fio, hdr = pmaker() From b457534f5a34c7eef91a11b7abdf15deab2b7ce5 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Tue, 26 Nov 2019 10:22:29 -0500 Subject: [PATCH 3/7] RF: Drop (mostly) redundant ArrayProxy.get_scaled() method --- nibabel/arrayproxy.py | 29 ++++++++++---------------- nibabel/dataobj_images.py | 11 +++------- nibabel/ecat.py | 36 +++++++++------------------------ nibabel/minc1.py | 32 ++++++++--------------------- nibabel/parrec.py | 23 ++++++++------------- nibabel/tests/test_proxy_api.py | 24 ++++------------------ 6 files changed, 44 insertions(+), 111 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index d7d96b24fb..c4189e61e8 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -378,34 +378,27 @@ def get_unscaled(self): """ return self._get_unscaled(slicer=()) - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling + def __array__(self, dtype=None): + """ Read data from file and apply scaling, casting to ``dtype`` - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. + If ``dtype`` is unspecified, the dtype of the returned array is the + narrowest dtype that can represent the data without overflow. + Generally, it is the wider of the dtypes of the slopes or intercepts. - If dtype is unspecified, it is the wider of the dtypes of the slope - or intercept. This will generally be determined by the parameter - size in the image header, and so should be consistent for a given - image format, but may vary across formats. Notably, these factors - are single-precision (32-bit) floats for NIfTI-1 and double-precision - (64-bit) floats for NIfTI-2. + The types of the scale factors will generally be determined by the + parameter size in the image header, and so should be consistent for a + given image format, but may vary across formats. Parameters ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. Returns ------- array - Scaled of image data of data type `dtype`. + Scaled image data with type `dtype`. """ - return self._get_scaled(dtype=dtype, slicer=()) - - def __array__(self, dtype=None): arr = self._get_scaled(dtype=dtype, slicer=()) if dtype is not None: arr = arr.astype(dtype, copy=False) diff --git a/nibabel/dataobj_images.py b/nibabel/dataobj_images.py index dd4c853537..4b4d1b55d8 100644 --- a/nibabel/dataobj_images.py +++ b/nibabel/dataobj_images.py @@ -10,7 +10,6 @@ import numpy as np -from .arrayproxy import is_proxy from .filebasedimages import FileBasedImage from .keywordonly import kw_only_meth from .deprecated import deprecate_with_version @@ -351,14 +350,10 @@ def get_fdata(self, caching='fill', dtype=np.float64): if self._fdata_cache is not None: if self._fdata_cache.dtype.type == dtype.type: return self._fdata_cache - dataobj = self._dataobj - # Attempt to confine data array to dtype during scaling - # On overflow, may still upcast - if is_proxy(dataobj): - dataobj = dataobj.get_scaled(dtype=dtype) # Always return requested data type - # For array proxies, will only copy on overflow - data = np.asanyarray(dataobj, dtype=dtype) + # For array proxies, will attempt to confine data array to dtype + # during scaling + data = np.asanyarray(self._dataobj, dtype=dtype) if caching == 'fill': self._fdata_cache = data return data diff --git a/nibabel/ecat.py b/nibabel/ecat.py index 7686a8098b..b15d6906ea 100644 --- a/nibabel/ecat.py +++ b/nibabel/ecat.py @@ -696,6 +696,16 @@ def __array__(self, dtype=None): If you want to read only some slices, use the slicing syntax (``__getitem__``) below, or ``subheader.data_from_fileobj(frame)`` + + Parameters + ---------- + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. + + Returns + ------- + array + Scaled image data with type `dtype`. ''' # dtype=None is interpreted as float64 data = np.empty(self.shape, dtype=dtype) @@ -705,32 +715,6 @@ def __array__(self, dtype=None): frame_mapping[i][0]) return data - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling - - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. - - If dtype is unspecified, it is automatically determined. - - Parameters - ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. - - Returns - ------- - array - Scaled of image data of data type `dtype`. - """ - data = self.__array__() - if dtype is None: - return data - final_type = np.promote_types(data.dtype, dtype) - return data.astype(final_type, copy=False) - def __getitem__(self, sliceobj): """ Return slice `sliceobj` from ECAT data, optimizing if possible """ diff --git a/nibabel/minc1.py b/nibabel/minc1.py index 2c54be841e..6137d11a65 100644 --- a/nibabel/minc1.py +++ b/nibabel/minc1.py @@ -261,45 +261,29 @@ def ndim(self): def is_proxy(self): return True - def _get_scaled(self, dtype, slicer): - data = self.minc_file.get_scaled_data(slicer) - if dtype is None: - return data - final_type = np.promote_types(data.dtype, dtype) - return data.astype(final_type, copy=False) - - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling - - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. + def __array__(self, dtype=None): + """ Read data from file and apply scaling, casting to ``dtype`` - If dtype is unspecified, it is automatically determined. + If ``dtype`` is unspecified, the dtype is automatically determined. Parameters ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. Returns ------- array - Scaled of image data of data type `dtype`. + Scaled image data with type `dtype`. """ - return self._get_scaled(dtype=dtype, slicer=()) - - def __array__(self, dtype=None): - ''' Read of data from file ''' - arr = self._get_scaled(dtype=dtype, slicer=()) + arr = self.minc_file.get_scaled_data(sliceobj=()) if dtype is not None: arr = arr.astype(dtype, copy=False) return arr def __getitem__(self, sliceobj): """ Read slice `sliceobj` of data from file """ - return self._get_scaled(dtype=None, slicer=sliceobj) + return self.minc_file.get_scaled_data(sliceobj) class MincHeader(SpatialHeader): diff --git a/nibabel/parrec.py b/nibabel/parrec.py index 5e26f67e0a..431e043d12 100644 --- a/nibabel/parrec.py +++ b/nibabel/parrec.py @@ -676,30 +676,23 @@ def get_unscaled(self): """ return self._get_unscaled(slicer=()) - def get_scaled(self, dtype=None): - """ Read data from file and apply scaling - - The dtype of the returned array is the narrowest dtype that can - represent the data without overflow, and is at least as wide as - the dtype parameter. + def __array__(self, dtype=None): + """ Read data from file and apply scaling, casting to ``dtype`` - If dtype is unspecified, it is the wider of the dtypes of the slopes - or intercepts + If ``dtype`` is unspecified, the dtype of the returned array is the + narrowest dtype that can represent the data without overflow. + Generally, it is the wider of the dtypes of the slopes or intercepts. Parameters ---------- - dtype : numpy dtype specifier - A numpy dtype specifier specifying the narrowest acceptable - dtype. + dtype : numpy dtype specifier, optional + A numpy dtype specifier specifying the type of the returned array. Returns ------- array - Scaled of image data of data type `dtype`. + Scaled image data with type `dtype`. """ - return self._get_scaled(dtype=dtype, slicer=()) - - def __array__(self, dtype=None): arr = self._get_scaled(dtype=dtype, slicer=()) if dtype is not None: arr = arr.astype(dtype, copy=False) diff --git a/nibabel/tests/test_proxy_api.py b/nibabel/tests/test_proxy_api.py index b0adc52987..585d5e1be6 100644 --- a/nibabel/tests/test_proxy_api.py +++ b/nibabel/tests/test_proxy_api.py @@ -142,7 +142,10 @@ def validate_array_interface_with_dtype(self, pmaker, params): for dtype in np.sctypes['float'] + np.sctypes['int'] + np.sctypes['uint']: # Directly coerce with a dtype direct = dtype(prox) - assert_almost_equal(direct, orig.astype(dtype)) + # Half-precision is imprecise. Obviously. It's a bad idea, but don't break + # the test over it. + rtol = 1e-03 if dtype == np.float16 else 1e-05 + assert_allclose(direct, orig.astype(dtype), rtol=rtol, atol=1e-08) assert_dt_equal(direct.dtype, np.dtype(dtype)) assert_equal(direct.shape, params['shape']) # All three methods should produce equivalent results @@ -153,25 +156,6 @@ def validate_array_interface_with_dtype(self, pmaker, params): # Shape matches expected shape assert_equal(out.shape, params['shape']) - def validate_get_scaled(self, pmaker, params): - # Check proxy returns expected array from asarray - prox, fio, hdr = pmaker() - out = prox.get_scaled() - assert_array_equal(out, params['arr_out']) - assert_dt_equal(out.dtype, params['dtype_out']) - # Shape matches expected shape - assert_equal(out.shape, params['shape']) - - for dtype in np.sctypes['float'] + np.sctypes['int'] + np.sctypes['uint']: - out = prox.get_scaled(dtype=dtype) - # Half-precision is imprecise. Obviously. It's a bad idea, but don't break - # the test over it. - rtol = 1e-03 if dtype == np.float16 else 1e-05 - assert_allclose(out, params['arr_out'].astype(out.dtype), rtol=rtol, atol=1e-08) - assert_greater_equal(out.dtype, np.dtype(dtype)) - # Shape matches expected shape - assert_equal(out.shape, params['shape']) - def validate_header_isolated(self, pmaker, params): # Confirm altering input header has no effect # Depends on header providing 'get_data_dtype', 'set_data_dtype', From 8630b26ca393a2ab0f1b6b7717910489e93af45c Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Wed, 27 Nov 2019 15:31:40 -0500 Subject: [PATCH 4/7] DOC: Update changelog --- Changelog | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Changelog b/Changelog index f9f4f00df5..165f1db94b 100644 --- a/Changelog +++ b/Changelog @@ -30,10 +30,13 @@ References like "pr/298" refer to github pull request numbers. New features ------------ -* ArrayProxy method ``get_scaled()`` scales data with a dtype of a - specified precision, promoting as necessary to avoid overflow. This - is to used in ``img.get_fdata()`` to control memory usage. (pr/833) - (CM, reviewed by Ross Markello) +* ArrayProxy ``__array__()`` now accepts a ``dtype`` parameter, allowing + ``numpy.array(dataobj, dtype=...)`` calls, as well as casting directly + with a dtype (for example, ``numpy.float32(dataobj)``) to control the + output type. Scale factors (slope, intercept) are applied, but may be + cast to narrower types, to control memory usage. This is now the basis + of ``img.get_fdata()``, which will scale data in single precision if + the output type is ``float32``. (pr/844) (CM, reviewed by ...) * GiftiImage method ``agg_data()`` to return usable data arrays (pr/793) (Hao-Ting Wang, reviewed by CM) * Accept ``os.PathLike`` objects in place of filenames (pr/610) (Cameron From 5e5ec22858eb111590a05295e16529f038cf0a34 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Mon, 2 Dec 2019 11:33:01 -0500 Subject: [PATCH 5/7] TEST: Filter complex warnings --- nibabel/tests/test_proxy_api.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nibabel/tests/test_proxy_api.py b/nibabel/tests/test_proxy_api.py index 585d5e1be6..48a024795d 100644 --- a/nibabel/tests/test_proxy_api.py +++ b/nibabel/tests/test_proxy_api.py @@ -57,7 +57,7 @@ from numpy.testing import assert_almost_equal, assert_array_equal, assert_allclose -from ..testing import data_path as DATA_PATH, assert_dt_equal +from ..testing import data_path as DATA_PATH, assert_dt_equal, clear_and_catch_warnings from ..tmpdirs import InTemporaryDirectory @@ -139,6 +139,12 @@ def validate_array_interface_with_dtype(self, pmaker, params): assert_array_equal(orig, params['arr_out']) assert_dt_equal(orig.dtype, params['dtype_out']) + context = None + if np.issubdtype(orig.dtype, np.complexfloating): + context = clear_and_catch_warnings() + context.__enter__() + warnings.simplefilter('ignore', np.ComplexWarning) + for dtype in np.sctypes['float'] + np.sctypes['int'] + np.sctypes['uint']: # Directly coerce with a dtype direct = dtype(prox) @@ -156,6 +162,9 @@ def validate_array_interface_with_dtype(self, pmaker, params): # Shape matches expected shape assert_equal(out.shape, params['shape']) + if context is not None: + context.__exit__() + def validate_header_isolated(self, pmaker, params): # Confirm altering input header has no effect # Depends on header providing 'get_data_dtype', 'set_data_dtype', From 70987fb327bd823750bef2c8ff73f7789eebf56d Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Mon, 2 Dec 2019 14:33:10 -0500 Subject: [PATCH 6/7] TEST: Improve test naming for tracking down failures --- nibabel/tests/test_api_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/tests/test_api_validators.py b/nibabel/tests/test_api_validators.py index a7cbb8b555..41d3f41110 100644 --- a/nibabel/tests/test_api_validators.py +++ b/nibabel/tests/test_api_validators.py @@ -19,7 +19,7 @@ def meth(self): for imaker, params in self.obj_params(): validator(self, imaker, params) meth.__name__ = 'test_' + name[len('validate_'):] - meth.__doc__ = 'autogenerated test from ' + name + meth.__doc__ = 'autogenerated test from {}.{}'.format(klass.__name__, name) return meth for name in dir(klass): if not name.startswith('validate_'): From 3351d16bbb930350378f395dbd22cde329b90e8c Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Thu, 5 Dec 2019 21:50:07 -0500 Subject: [PATCH 7/7] FIX: ECAT data must be coerced after reading --- nibabel/ecat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nibabel/ecat.py b/nibabel/ecat.py index b15d6906ea..1814b9147c 100644 --- a/nibabel/ecat.py +++ b/nibabel/ecat.py @@ -708,11 +708,13 @@ def __array__(self, dtype=None): Scaled image data with type `dtype`. ''' # dtype=None is interpreted as float64 - data = np.empty(self.shape, dtype=dtype) + data = np.empty(self.shape) frame_mapping = get_frame_order(self._subheader._mlist) for i in sorted(frame_mapping): data[:, :, :, i] = self._subheader.data_from_fileobj( frame_mapping[i][0]) + if dtype is not None: + data = data.astype(dtype, copy=False) return data def __getitem__(self, sliceobj):