Skip to content

Commit 5cbc26a

Browse files
committed
ENH: Load data from external file when GIFTI DataArray specifies
ExternalFileName
1 parent 95e1fbe commit 5cbc26a

File tree

1 file changed

+35
-17
lines changed

1 file changed

+35
-17
lines changed

nibabel/gifti/parse_gifti_fast.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,37 +30,50 @@ class GiftiParseError(ExpatError):
3030
""" Gifti-specific parsing error """
3131

3232

33-
def read_data_block(encoding, endian, ordering, datatype, shape, data):
33+
def read_data_block(encoding,
34+
endian,
35+
ordering,
36+
datatype,
37+
shape,
38+
data,
39+
darray):
3440
""" Tries to unzip, decode, parse the funny string data """
3541
enclabel = gifti_encoding_codes.label[encoding]
3642
dtype = data_type_codes.type[datatype]
43+
3744
if enclabel == 'ASCII':
3845
# GIFTI_ENCODING_ASCII
3946
c = StringIO(data)
4047
da = np.loadtxt(c, dtype=dtype)
4148
return da # independent of the endianness
42-
43-
elif enclabel == 'External':
44-
# GIFTI_ENCODING_EXTBIN
45-
raise NotImplementedError("In what format are the external files?")
46-
47-
elif enclabel not in ('B64BIN', 'B64GZ'):
49+
elif enclabel not in ('B64BIN', 'B64GZ', 'External'):
4850
return 0
4951

52+
# GIFTI_ENCODING_EXTBIN
53+
# We assume that the external data file is raw uncompressed binary, with
54+
# the data type/endianness/ordering specified by the other DataArray
55+
# attributes
56+
if enclabel == 'External':
57+
with open(darray.ext_fname, 'rb') as f:
58+
f.seek(darray.ext_offset)
59+
nbytes = np.prod(shape) * dtype().itemsize
60+
buff = f.read(nbytes)
61+
5062
# Numpy arrays created from bytes objects are read-only.
5163
# Neither b64decode nor decompress will return bytearrays, and there
5264
# are not equivalents to fobj.readinto to allow us to pass them, so
5365
# there is not a simple way to avoid making copies.
5466
# If this becomes a problem, we should write a decoding interface with
5567
# a tunable chunk size.
56-
dec = base64.b64decode(data.encode('ascii'))
57-
if enclabel == 'B64BIN':
58-
# GIFTI_ENCODING_B64BIN
59-
buff = bytearray(dec)
6068
else:
61-
# GIFTI_ENCODING_B64GZ
62-
buff = bytearray(zlib.decompress(dec))
63-
del dec
69+
dec = base64.b64decode(data.encode('ascii'))
70+
if enclabel == 'B64BIN':
71+
# GIFTI_ENCODING_B64BIN
72+
buff = bytearray(dec)
73+
else:
74+
# GIFTI_ENCODING_B64GZ
75+
buff = bytearray(zlib.decompress(dec))
76+
del dec
6477

6578
sh = tuple(shape)
6679
newarr = np.frombuffer(buff, dtype=dtype)
@@ -288,12 +301,17 @@ def CharacterDataHandler(self, data):
288301

289302
def flush_chardata(self):
290303
""" Collate and process collected character data"""
291-
if self._char_blocks is None:
304+
# Nothing to do for empty elements, except for Data elements which
305+
# are within a DataArray with an external file
306+
if self.write_to != 'Data' and self._char_blocks is None:
292307
return
293308
# Just join the strings to get the data. Maybe there are some memory
294309
# optimizations we could do by passing the list of strings to the
295310
# read_data_block function.
296-
data = ''.join(self._char_blocks)
311+
if self._char_blocks is not None:
312+
data = ''.join(self._char_blocks)
313+
else:
314+
data = None
297315
# Reset the char collector
298316
self._char_blocks = None
299317

@@ -324,7 +342,7 @@ def flush_chardata(self):
324342
da_tmp = self.img.darrays[-1]
325343
da_tmp.data = read_data_block(da_tmp.encoding, da_tmp.endian,
326344
da_tmp.ind_ord, da_tmp.datatype,
327-
da_tmp.dims, data)
345+
da_tmp.dims, data, self.da)
328346
# update the endianness according to the
329347
# current machine setting
330348
self.endian = gifti_endian_codes.code[sys.byteorder]

0 commit comments

Comments
 (0)