Skip to content

Commit 41b82e4

Browse files
committed
RF - collect chunks of characterdata, so allowing py25 buffer_sizes in parser
1 parent 387a049 commit 41b82e4

File tree

3 files changed

+61
-16
lines changed

3 files changed

+61
-16
lines changed

nibabel/gifti/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@
1818
"""
1919

2020
from .giftiio import read, write
21-
from .gifti import *
21+
from .gifti import (GiftiMetaData, GiftiNVPairs, GiftiLabelTable, GiftiLabel,
22+
GiftiCoordSystem, data_tag, GiftiDataArray, GiftiImage)
23+

nibabel/gifti/parse_gifti_fast.py

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,11 @@ def initialize(self):
9393
self.write_to = None
9494
self.img = None
9595

96+
# Collecting char buffer fragments
97+
self._char_blocks = None
98+
9699
def StartElementHandler(self, name, attrs):
100+
self.flush_chardata()
97101
if DEBUG_PRINT:
98102
print 'Start element:\n\t', repr(name), attrs
99103
if name == 'GIFTI':
@@ -195,6 +199,7 @@ def StartElementHandler(self, name, attrs):
195199
self.write_to = 'Data'
196200

197201
def EndElementHandler(self, name):
202+
self.flush_chardata()
198203
if DEBUG_PRINT:
199204
print 'End element:\n\t', repr(name)
200205
if name == 'GIFTI':
@@ -249,6 +254,30 @@ def EndElementHandler(self, name):
249254
self.write_to = None
250255

251256
def CharacterDataHandler(self, data):
257+
""" Collect character data chunks pending collation
258+
259+
The parser breaks the data up into chunks of size depending on the
260+
buffer_size of the parser. A large bit of character data, with standard
261+
parser buffer_size (such as 8K) can easily span many calls to this
262+
function. We thus collect the chunks and process them when we hit start
263+
or end tags.
264+
"""
265+
if self._char_blocks is None:
266+
self._char_blocks = []
267+
self._char_blocks.append(data)
268+
269+
def flush_chardata(self):
270+
""" Collate and process collected character data
271+
"""
272+
if self._char_blocks is None:
273+
return
274+
# Just join the strings to get the data. Maybe there are some memory
275+
# optimizations we could do by passing the list of strings to the
276+
# read_data_block function.
277+
data = ''.join(self._char_blocks)
278+
# Reset the char collector
279+
self._char_blocks = None
280+
# Process data
252281
if self.write_to == 'Name':
253282
data = data.strip()
254283
self.nvpair.name = data
@@ -277,25 +306,40 @@ def CharacterDataHandler(self, data):
277306
elif self.write_to == 'Label':
278307
self.label.label = data.strip()
279308

309+
@property
310+
def pending_data(self):
311+
" True if there is character data pending for processing "
312+
return not self._char_blocks is None
313+
280314

281-
def parse_gifti_file(fname, buffer_size = 35000000):
315+
def parse_gifti_file(fname, buffer_size = None):
282316
""" Parse gifti file named `fname`, return image
283317
284318
Parameters
285319
----------
286320
fname : str
287321
filename of gifti file
288-
buffer_size: int, optional
289-
size of read buffer.
322+
buffer_size: None or int, optional
323+
size of read buffer. None gives default of 35000000 unless on python <
324+
2.6, in which case it is read only in the parser. In that case values
325+
other than None cause a ValueError on execution
290326
291327
Returns
292328
-------
293329
img : gifti image
294330
"""
331+
if buffer_size is None:
332+
buffer_sz_val = 35000000
333+
else:
334+
buffer_sz_val = buffer_size
295335
datasource = open(fname,'rb')
296336
parser = ParserCreate()
297337
parser.buffer_text = True
298-
parser.buffer_size = buffer_size
338+
try:
339+
parser.buffer_size = buffer_sz_val
340+
except AttributeError:
341+
if not buffer_size is None:
342+
raise ValueError('Cannot set buffer size for parser')
299343
HANDLER_NAMES = ['StartElementHandler',
300344
'EndElementHandler',
301345
'CharacterDataHandler']
@@ -306,6 +350,8 @@ def parse_gifti_file(fname, buffer_size = 35000000):
306350
parser.ParseFile(datasource)
307351
except ExpatError:
308352
print 'An expat error occured while parsing the Gifti file.'
353+
# Reality check for pending data
354+
assert out.pending_data is False
309355
# update filename
310356
out.img.filename = fname
311357
return out.img

nibabel/gifti/tests/test_giftiio.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,15 @@
1212

1313
import numpy as np
1414

15-
from numpy.testing import assert_array_equal, assert_array_almost_equal
15+
from ... import gifti as gi
16+
from ...nifti1 import xform_codes
1617

17-
from nose.tools import assert_true, assert_false, \
18-
assert_equal, assert_raises
18+
from ...tmpdirs import InTemporaryDirectory
1919

20-
try:
21-
import nibabel.gifti as gi
22-
except ImportError:
23-
from nose import SkipTest
24-
raise SkipTest
20+
from numpy.testing import assert_array_equal, assert_array_almost_equal
2521

26-
from ...tmpdirs import InTemporaryDirectory
22+
from nose.tools import (assert_true, assert_false, assert_equal,
23+
assert_raises)
2724

2825

2926
IO_DATA_PATH = pjoin(dirname(__file__), 'data')
@@ -129,8 +126,8 @@ def test_dataarray1():
129126
assert_equal(me['AnatomicalStructurePrimary'], 'CortexLeft')
130127

131128
assert_array_almost_equal(img.darrays[0].coordsys.xform, np.eye(4,4))
132-
assert_equal(gi.xform_codes.niistring[img.darrays[0].coordsys.dataspace],'NIFTI_XFORM_TALAIRACH')
133-
assert_equal(gi.xform_codes.niistring[img.darrays[0].coordsys.xformspace],'NIFTI_XFORM_TALAIRACH')
129+
assert_equal(xform_codes.niistring[img.darrays[0].coordsys.dataspace],'NIFTI_XFORM_TALAIRACH')
130+
assert_equal(xform_codes.niistring[img.darrays[0].coordsys.xformspace],'NIFTI_XFORM_TALAIRACH')
134131

135132
def test_dataarray2():
136133
img2 = gi.read(DATA_FILE2)

0 commit comments

Comments
 (0)