Skip to content

Commit 39399fa

Browse files
committed
ENH: Scaffolding to support mem-mapping of external data files for GIFTI images
1 parent 8e3e766 commit 39399fa

File tree

2 files changed

+42
-8
lines changed

2 files changed

+42
-8
lines changed

nibabel/gifti/gifti.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -881,27 +881,41 @@ def to_file_map(self, file_map=None):
881881
f.write(self.to_xml())
882882

883883
@classmethod
884-
def from_file_map(klass, file_map, buffer_size=35000000):
885-
""" Load a Gifti image from a file_map
884+
def from_file_map(klass, file_map, buffer_size=35000000, mmap=True):
885+
"""Load a Gifti image from a file_map
886886
887887
Parameters
888888
----------
889889
file_map : dict
890890
Dictionary with single key ``image`` with associated value which is
891891
a :class:`FileHolder` instance pointing to the image file.
892892
893+
buffer_size: None or int, optional
894+
size of read buffer. None uses default buffer_size
895+
from xml.parsers.expat.
896+
897+
mmap : {True, False, 'c', 'r', 'r+'}
898+
Controls the use of numpy memory mapping for reading data. Only
899+
has an effect when loading GIFTI images with data stored in
900+
external files (``DataArray`` elements with an ``Encoding`` equal
901+
to ``ExternalFileBinary``). If ``False``, do not try numpy
902+
``memmap`` for data array. If one of ``{'c', 'r', 'r+'}``, try
903+
numpy ``memmap`` with ``mode=mmap``. A `mmap` value of ``True``
904+
gives the same behavior as ``mmap='c'``. If the file cannot be
905+
memory-mapped, ignore `mmap` value and read array from file.
906+
893907
Returns
894908
-------
895909
img : GiftiImage
896910
"""
897-
parser = klass.parser(buffer_size=buffer_size)
911+
parser = klass.parser(buffer_size=buffer_size, mmap=mmap)
898912
parser.parse(fptr=file_map['image'].get_prepare_fileobj('rb'))
899913
return parser.img
900914

901915
@classmethod
902-
def from_filename(klass, filename, buffer_size=35000000):
916+
def from_filename(klass, filename, buffer_size=35000000, mmap=True):
903917
file_map = klass.filespec_to_file_map(filename)
904-
img = klass.from_file_map(file_map, buffer_size=buffer_size)
918+
img = klass.from_file_map(file_map, buffer_size=buffer_size, mmap=mmap)
905919
return img
906920

907921

nibabel/gifti/parse_gifti_fast.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class GiftiParseError(ExpatError):
3131
""" Gifti-specific parsing error """
3232

3333

34-
def read_data_block(darray, fname, data):
34+
def read_data_block(darray, fname, data, mmap):
3535
"""Parses data from a <Data> element, or loads from an external file.
3636
3737
Parameters
@@ -46,10 +46,25 @@ def read_data_block(darray, fname, data):
4646
data : str or None
4747
Data to parse, or None if data is in an external file
4848
49+
mmap : {True, False, 'c', 'r', 'r+'}
50+
Controls the use of numpy memory mapping for reading data. Only has
51+
an effect when loading GIFTI images with data stored in external files
52+
(``DataArray`` elements with an ``Encoding`` equal to
53+
``ExternalFileBinary``). If ``False``, do not try numpy ``memmap``
54+
for data array. If one of ``{'c', 'r', 'r+'}``, try numpy ``memmap``
55+
with ``mode=mmap``. A `mmap` value of ``True`` gives the same
56+
behavior as ``mmap='c'``. If the file cannot be memory-mapped, ignore
57+
`mmap` value and read array from file.
58+
4959
Returns
5060
-------
5161
numpy.ndarray containing the parsed data
5262
"""
63+
if mmap not in (True, False, 'c', 'r', 'r+'):
64+
raise ValueError("mmap value should be one of True, False, 'c', "
65+
"'r', 'r+'")
66+
if mmap is True:
67+
mmap = 'c'
5368
enclabel = gifti_encoding_codes.label[darray.encoding]
5469
dtype = data_type_codes.type[darray.datatype]
5570

@@ -114,13 +129,17 @@ def _str2int(in_str):
114129

115130
class GiftiImageParser(XmlParser):
116131

117-
def __init__(self, encoding=None, buffer_size=35000000, verbose=0):
132+
def __init__(self, encoding=None, buffer_size=35000000, verbose=0,
133+
mmap=True):
118134
super(GiftiImageParser, self).__init__(encoding=encoding,
119135
buffer_size=buffer_size,
120136
verbose=verbose)
121137
# output
122138
self.img = None
123139

140+
# Queried when loading data from <Data> elements - see read_data_block
141+
self.mmap = mmap
142+
124143
# finite state machine stack
125144
self.fsm_state = []
126145

@@ -358,7 +377,8 @@ def flush_chardata(self):
358377
c.close()
359378

360379
elif self.write_to == 'Data':
361-
self.da.data = read_data_block(self.da, self.fname, data)
380+
self.da.data = read_data_block(self.da, self.fname, data,
381+
self.mmap)
362382
# update the endianness according to the
363383
# current machine setting
364384
self.endian = gifti_endian_codes.code[sys.byteorder]

0 commit comments

Comments
 (0)