Skip to content

Commit 853508e

Browse files
author
Ben Cipollini
committed
BF: re-implement buffer size; makes parsing faster!
1 parent 51adb8a commit 853508e

File tree

2 files changed

+31
-37
lines changed

2 files changed

+31
-37
lines changed

nibabel/gifti/parse_gifti_fast.py

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,9 @@ def read_data_block(encoding, endian, ordering, datatype, shape, data):
7272

7373
class GiftiImageParser(XmlImageParser):
7474

75-
def __init__(self):
76-
super(GiftiImageParser, self).__init__()
75+
def __init__(self, encoding=None, buffer_size=35000000):
76+
super(GiftiImageParser, self).__init__(encoding=encoding,
77+
buffer_size=buffer_size)
7778

7879
# finite state machine stack
7980
self.fsm_state = []
@@ -94,7 +95,6 @@ def __init__(self):
9495

9596
# Collecting char buffer fragments
9697
self._char_blocks = None
97-
self.buffer_size = None
9898

9999
def StartElementHandler(self, name, attrs):
100100
self.flush_chardata()
@@ -313,33 +313,6 @@ def pending_data(self):
313313
" True if there is character data pending for processing "
314314
return not self._char_blocks is None
315315

316-
def _create_parser(self):
317-
parser = super(GiftiImageParser, self)._create_parser()
318-
if self.buffer_size is not None:
319-
parser.buffer_text = True
320-
parser.buffer_size = self.buffer_size
321-
return parser
322-
323-
def parse(self, string=None, fname=None, fptr=None, buffer_size=None):
324-
""" Parse gifti file named `fname`, return image
325-
326-
Parameters
327-
----------
328-
fname : str
329-
filename of gifti file
330-
buffer_size: None or int, optional
331-
size of read buffer. None gives default of 35000000 unless on python <
332-
2.6, in which case it is read only in the parser. In that case values
333-
other than None cause a ValueError on execution
334-
335-
Returns
336-
-------
337-
img : gifti image
338-
"""
339-
self.buffer_size = buffer_size
340-
return super(GiftiImageParser, self).parse(string=string, fname=fname,
341-
fptr=fptr)
342-
343316

344317
class Outputter(GiftiImageParser):
345318
@np.deprecate_with_doc("Use GiftiImageParser instead.")

nibabel/xmlbasedimages.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,32 @@ def to_xml(self, enc='utf-8'):
2929

3030

3131
class XmlBasedHeader(FileBasedHeader, XmlSerializable):
32+
""" Basic wrapper around FileBasedHeader and XmlSerializable."""
3233
pass
3334

3435

3536
class XmlImageParser(object):
36-
""" Parse XML image"""
37+
""" Base class for defining how to parse xml-based images."""
3738

3839
HANDLER_NAMES = ['StartElementHandler',
3940
'EndElementHandler',
4041
'CharacterDataHandler']
4142

42-
def __init__(self, encoding=None):
43+
def __init__(self, encoding=None, buffer_size=35000000):
4344
self.encoding = encoding
45+
self.buffer_size = buffer_size
4446
self.img = None
4547

4648
def _create_parser(self):
47-
return ParserCreate() # from xml package
49+
"""Internal function that allows subclasses to mess
50+
with the underlying parser, if desired."""
4851

49-
def parse(self, string=None, fname=None, fptr=None):
52+
parser = ParserCreate(encoding=self.encoding) # from xml package
53+
parser.buffer_text = True
54+
parser.buffer_size = self.buffer_size
55+
return parser
56+
57+
def parse(self, string=None, fname=None, fptr=None, buffer_size=None):
5058
"""
5159
Parameters
5260
----------
@@ -59,6 +67,11 @@ def parse(self, string=None, fname=None, fptr=None):
5967
fptr : file pointer
6068
open file pointer to an xml document
6169
70+
buffer_size: None or int, optional
71+
size of read buffer. None gives default of 35000000 unless on python <
72+
2.6, in which case it is read only in the parser. In that case values
73+
other than None cause a ValueError on execution
74+
6275
Returns
6376
-------
6477
img : XmlBasedImage
@@ -76,6 +89,10 @@ def parse(self, string=None, fname=None, fptr=None):
7689
setattr(parser, name, getattr(self, name))
7790
parser.ParseFile(fptr)
7891

92+
if fname is not None:
93+
fptr.close()
94+
self.img.set_filename(fname)
95+
7996
return self.img
8097

8198
def StartElementHandler(self, name, attrs):
@@ -108,7 +125,7 @@ def to_file_map(self, file_map=None):
108125
f.write(self.to_xml())
109126

110127
@classmethod
111-
def from_file_map(klass, file_map):
128+
def from_file_map(klass, file_map, buffer_size=35000000):
112129
""" Load a Gifti image from a file_map
113130
114131
Parameters
@@ -119,7 +136,11 @@ def from_file_map(klass, file_map):
119136
img : GiftiImage
120137
Returns a GiftiImage
121138
"""
122-
img = klass.parser().parse(
139+
img = klass.parser(buffer_size=buffer_size).parse(
123140
fptr=file_map['image'].get_prepare_fileobj('rb'))
124-
img.set_filename(file_map['image'].filename)
125141
return img
142+
143+
@classmethod
144+
def from_filename(klass, filename, buffer_size=35000000):
145+
file_map = klass.filespec_to_file_map(filename)
146+
return klass.from_file_map(file_map, buffer_size=buffer_size)

0 commit comments

Comments
 (0)