Skip to content

Commit 42501a5

Browse files
matthew-bretteffigies
authored andcommitted
RF+TST: refactor dataobj attribute into own class
Add interface to dataobj to an abstract class for easier inheritance by others (particularly CIFTI2). Make SpatialImage inherit from this class. Add tests for filebasedimages and dataobj_images.
1 parent 97405ad commit 42501a5

File tree

5 files changed

+366
-212
lines changed

5 files changed

+366
-212
lines changed

nibabel/dataobj_images.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
""" File-based images that have data arrays
2+
3+
The class:`DataObjImage` class defines an image that extends the
4+
:class:`FileBasedImage` by adding an array-like object, named ``dataobj``.
5+
This can either be an actual numpy array, or an object that:
6+
7+
* returns an array from ``numpy.asanyarray(obj)``;
8+
* has an attribute or property ``shape``.
9+
"""
10+
11+
import numpy as np
12+
13+
from .filebasedimages import FileBasedImage
14+
from .deprecated import deprecate_with_version
15+
16+
17+
class DataobjImage(FileBasedImage):
18+
''' Template class for images that have dataobj data stores'''
19+
20+
def __init__(self, dataobj, header=None, extra=None, file_map=None):
21+
''' Initialize dataobj image
22+
23+
The datobj image is a combination of (dataobj, header), with optional
24+
metadata in `extra`, and filename / file-like objects contained in the
25+
`file_map` mapping.
26+
27+
Parameters
28+
----------
29+
dataobj : object
30+
Object containg image data. It should be some object that retuns an
31+
array from ``np.asanyarray``. It should have a ``shape`` attribute
32+
or property
33+
header : None or mapping or header instance, optional
34+
metadata for this image format
35+
extra : None or mapping, optional
36+
metadata to associate with image that cannot be stored in the
37+
metadata of this image type
38+
file_map : mapping, optional
39+
mapping giving file information for this image format
40+
'''
41+
super(DataobjImage, self).__init__(header=header, extra=extra,
42+
file_map=file_map)
43+
self._dataobj = dataobj
44+
self._data_cache = None
45+
46+
@property
47+
def dataobj(self):
48+
return self._dataobj
49+
50+
@property
51+
@deprecate_with_version('_data attribute not part of public API. '
52+
'please use "dataobj" property instead.',
53+
'2.0', '4.0')
54+
def _data(self):
55+
return self._dataobj
56+
57+
def get_data(self, caching='fill'):
58+
""" Return image data from image with any necessary scalng applied
59+
60+
The image ``dataobj`` property can be an array proxy or an array. An
61+
array proxy is an object that knows how to load the image data from
62+
disk. An image with an array proxy ``dataobj`` is a *proxy image*; an
63+
image with an array in ``dataobj`` is an *array image*.
64+
65+
The default behavior for ``get_data()`` on a proxy image is to read the
66+
data from the proxy, and store in an internal cache. Future calls to
67+
``get_data`` will return the cached array. This is the behavior
68+
selected with `caching` == "fill".
69+
70+
Once the data has been cached and returned from an array proxy, if you
71+
modify the returned array, you will also modify the cached array
72+
(because they are the same array). Regardless of the `caching` flag,
73+
this is always true of an array image.
74+
75+
Parameters
76+
----------
77+
caching : {'fill', 'unchanged'}, optional
78+
See the Notes section for a detailed explanation. This argument
79+
specifies whether the image object should fill in an internal
80+
cached reference to the returned image data array. "fill" specifies
81+
that the image should fill an internal cached reference if
82+
currently empty. Future calls to ``get_data`` will return this
83+
cached reference. You might prefer "fill" to save the image object
84+
from having to reload the array data from disk on each call to
85+
``get_data``. "unchanged" means that the image should not fill in
86+
the internal cached reference if the cache is currently empty. You
87+
might prefer "unchanged" to "fill" if you want to make sure that
88+
the call to ``get_data`` does not create an extra (cached)
89+
reference to the returned array. In this case it is easier for
90+
Python to free the memory from the returned array.
91+
92+
Returns
93+
-------
94+
data : array
95+
array of image data
96+
97+
See also
98+
--------
99+
uncache: empty the array data cache
100+
101+
Notes
102+
-----
103+
All images have a property ``dataobj`` that represents the image array
104+
data. Images that have been loaded from files usually do not load the
105+
array data from file immediately, in order to reduce image load time
106+
and memory use. For these images, ``dataobj`` is an *array proxy*; an
107+
object that knows how to load the image array data from file.
108+
109+
By default (`caching` == "fill"), when you call ``get_data`` on a
110+
proxy image, we load the array data from disk, store (cache) an
111+
internal reference to this array data, and return the array. The next
112+
time you call ``get_data``, you will get the cached reference to the
113+
array, so we don't have to load the array data from disk again.
114+
115+
Array images have a ``dataobj`` property that already refers to an
116+
array in memory, so there is no benefit to caching, and the `caching`
117+
keywords have no effect.
118+
119+
For proxy images, you may not want to fill the cache after reading the
120+
data from disk because the cache will hold onto the array memory until
121+
the image object is deleted, or you use the image ``uncache`` method.
122+
If you don't want to fill the cache, then always use
123+
``get_data(caching='unchanged')``; in this case ``get_data`` will not
124+
fill the cache (store the reference to the array) if the cache is empty
125+
(no reference to the array). If the cache is full, "unchanged" leaves
126+
the cache full and returns the cached array reference.
127+
128+
The cache can effect the behavior of the image, because if the cache is
129+
full, or you have an array image, then modifying the returned array
130+
will modify the result of future calls to ``get_data()``. For example
131+
you might do this:
132+
133+
>>> import os
134+
>>> import nibabel as nib
135+
>>> from nibabel.testing import data_path
136+
>>> img_fname = os.path.join(data_path, 'example4d.nii.gz')
137+
138+
>>> img = nib.load(img_fname) # This is a proxy image
139+
>>> nib.is_proxy(img.dataobj)
140+
True
141+
142+
The array is not yet cached by a call to "get_data", so:
143+
144+
>>> img.in_memory
145+
False
146+
147+
After we call ``get_data`` using the default `caching` == 'fill', the
148+
cache contains a reference to the returned array ``data``:
149+
150+
>>> data = img.get_data()
151+
>>> img.in_memory
152+
True
153+
154+
We modify an element in the returned data array:
155+
156+
>>> data[0, 0, 0, 0]
157+
0
158+
>>> data[0, 0, 0, 0] = 99
159+
>>> data[0, 0, 0, 0]
160+
99
161+
162+
The next time we call 'get_data', the method returns the cached
163+
reference to the (modified) array:
164+
165+
>>> data_again = img.get_data()
166+
>>> data_again is data
167+
True
168+
>>> data_again[0, 0, 0, 0]
169+
99
170+
171+
If you had *initially* used `caching` == 'unchanged' then the returned
172+
``data`` array would have been loaded from file, but not cached, and:
173+
174+
>>> img = nib.load(img_fname) # a proxy image again
175+
>>> data = img.get_data(caching='unchanged')
176+
>>> img.in_memory
177+
False
178+
>>> data[0, 0, 0] = 99
179+
>>> data_again = img.get_data(caching='unchanged')
180+
>>> data_again is data
181+
False
182+
>>> data_again[0, 0, 0, 0]
183+
0
184+
"""
185+
if caching not in ('fill', 'unchanged'):
186+
raise ValueError('caching value should be "fill" or "unchanged"')
187+
if self._data_cache is not None:
188+
return self._data_cache
189+
data = np.asanyarray(self._dataobj)
190+
if caching == 'fill':
191+
self._data_cache = data
192+
return data
193+
194+
@property
195+
def in_memory(self):
196+
""" True when array data is in memory
197+
"""
198+
return (isinstance(self._dataobj, np.ndarray) or
199+
self._data_cache is not None)
200+
201+
def uncache(self):
202+
""" Delete any cached read of data from proxied data
203+
204+
Remember there are two types of images:
205+
206+
* *array images* where the data ``img.dataobj`` is an array
207+
* *proxy images* where the data ``img.dataobj`` is a proxy object
208+
209+
If you call ``img.get_data()`` on a proxy image, the result of reading
210+
from the proxy gets cached inside the image object, and this cache is
211+
what gets returned from the next call to ``img.get_data()``. If you
212+
modify the returned data, as in::
213+
214+
data = img.get_data()
215+
data[:] = 42
216+
217+
then the next call to ``img.get_data()`` returns the modified array,
218+
whether the image is an array image or a proxy image::
219+
220+
assert np.all(img.get_data() == 42)
221+
222+
When you uncache an array image, this has no effect on the return of
223+
``img.get_data()``, but when you uncache a proxy image, the result of
224+
``img.get_data()`` returns to its original value.
225+
"""
226+
self._data_cache = None
227+
228+
@property
229+
def shape(self):
230+
return self._dataobj.shape
231+
232+
@deprecate_with_version('get_shape method is deprecated.\n'
233+
'Please use the ``img.shape`` property '
234+
'instead.',
235+
'1.2', '3.0')
236+
def get_shape(self):
237+
""" Return shape for image
238+
"""
239+
return self.shape

nibabel/filebasedimages.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
99
''' Common interface for any image format--volume or surface, binary or xml.'''
1010

11+
from copy import deepcopy
12+
1113
from .externals.six import string_types
1214
from .fileholders import FileHolder
1315
from .filename_parser import (types_filenames, TypesFilenamesError,
@@ -55,7 +57,7 @@ def copy(self):
5557
The copy should not be affected by any changes to the original
5658
object.
5759
'''
58-
raise NotImplementedError
60+
return deepcopy(self)
5961

6062

6163
class FileBasedImage(object):
@@ -189,11 +191,7 @@ def __init__(self, header=None, extra=None, file_map=None):
189191
file_map : mapping, optional
190192
mapping giving file information for this image format
191193
'''
192-
193-
if header or self.header_class:
194-
self._header = self.header_class.from_header(header)
195-
else:
196-
self._header = None
194+
self._header = self.header_class.from_header(header)
197195
if extra is None:
198196
extra = {}
199197
self.extra = extra
@@ -231,10 +229,9 @@ def get_filename(self):
231229
-------
232230
fname : None or str
233231
Returns None if there is no filename, or a filename string.
234-
If an image may have several filenames assoctiated with it
235-
(e.g Analyze ``.img, .hdr`` pair) then we return the more
236-
characteristic filename (the ``.img`` filename in the case of
237-
Analyze')
232+
If an image may have several filenames associated with it (e.g.
233+
Analyze ``.img, .hdr`` pair) then we return the more characteristic
234+
filename (the ``.img`` filename in the case of Analyze')
238235
'''
239236
# which filename is returned depends on the ordering of the
240237
# 'files_types' class attribute - we return the name

0 commit comments

Comments
 (0)