@@ -93,7 +93,11 @@ def initialize(self):
93
93
self .write_to = None
94
94
self .img = None
95
95
96
+ # Collecting char buffer fragments
97
+ self ._char_blocks = None
98
+
96
99
def StartElementHandler (self , name , attrs ):
100
+ self .flush_chardata ()
97
101
if DEBUG_PRINT :
98
102
print 'Start element:\n \t ' , repr (name ), attrs
99
103
if name == 'GIFTI' :
@@ -195,6 +199,7 @@ def StartElementHandler(self, name, attrs):
195
199
self .write_to = 'Data'
196
200
197
201
def EndElementHandler (self , name ):
202
+ self .flush_chardata ()
198
203
if DEBUG_PRINT :
199
204
print 'End element:\n \t ' , repr (name )
200
205
if name == 'GIFTI' :
@@ -249,6 +254,30 @@ def EndElementHandler(self, name):
249
254
self .write_to = None
250
255
251
256
def CharacterDataHandler (self , data ):
257
+ """ Collect character data chunks pending collation
258
+
259
+ The parser breaks the data up into chunks of size depending on the
260
+ buffer_size of the parser. A large bit of character data, with standard
261
+ parser buffer_size (such as 8K) can easily span many calls to this
262
+ function. We thus collect the chunks and process them when we hit start
263
+ or end tags.
264
+ """
265
+ if self ._char_blocks is None :
266
+ self ._char_blocks = []
267
+ self ._char_blocks .append (data )
268
+
269
+ def flush_chardata (self ):
270
+ """ Collate and process collected character data
271
+ """
272
+ if self ._char_blocks is None :
273
+ return
274
+ # Just join the strings to get the data. Maybe there are some memory
275
+ # optimizations we could do by passing the list of strings to the
276
+ # read_data_block function.
277
+ data = '' .join (self ._char_blocks )
278
+ # Reset the char collector
279
+ self ._char_blocks = None
280
+ # Process data
252
281
if self .write_to == 'Name' :
253
282
data = data .strip ()
254
283
self .nvpair .name = data
@@ -277,25 +306,40 @@ def CharacterDataHandler(self, data):
277
306
elif self .write_to == 'Label' :
278
307
self .label .label = data .strip ()
279
308
309
+ @property
310
+ def pending_data (self ):
311
+ " True if there is character data pending for processing "
312
+ return not self ._char_blocks is None
313
+
280
314
281
- def parse_gifti_file (fname , buffer_size = 35000000 ):
315
+ def parse_gifti_file (fname , buffer_size = None ):
282
316
""" Parse gifti file named `fname`, return image
283
317
284
318
Parameters
285
319
----------
286
320
fname : str
287
321
filename of gifti file
288
- buffer_size: int, optional
289
- size of read buffer.
322
+ buffer_size: None or int, optional
323
+ size of read buffer. None gives default of 35000000 unless on python <
324
+ 2.6, in which case it is read only in the parser. In that case values
325
+ other than None cause a ValueError on execution
290
326
291
327
Returns
292
328
-------
293
329
img : gifti image
294
330
"""
331
+ if buffer_size is None :
332
+ buffer_sz_val = 35000000
333
+ else :
334
+ buffer_sz_val = buffer_size
295
335
datasource = open (fname ,'rb' )
296
336
parser = ParserCreate ()
297
337
parser .buffer_text = True
298
- parser .buffer_size = buffer_size
338
+ try :
339
+ parser .buffer_size = buffer_sz_val
340
+ except AttributeError :
341
+ if not buffer_size is None :
342
+ raise ValueError ('Cannot set buffer size for parser' )
299
343
HANDLER_NAMES = ['StartElementHandler' ,
300
344
'EndElementHandler' ,
301
345
'CharacterDataHandler' ]
@@ -306,6 +350,8 @@ def parse_gifti_file(fname, buffer_size = 35000000):
306
350
parser .ParseFile (datasource )
307
351
except ExpatError :
308
352
print 'An expat error occured while parsing the Gifti file.'
353
+ # Reality check for pending data
354
+ assert out .pending_data is False
309
355
# update filename
310
356
out .img .filename = fname
311
357
return out .img
0 commit comments