Skip to content

Commit 5ee2c7c

Browse files
author
Ben Cipollini
committed
BF: Fix #362 using buffered gzip read.
1 parent cf99743 commit 5ee2c7c

File tree

1 file changed

+33
-2
lines changed

1 file changed

+33
-2
lines changed

nibabel/openers.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,42 @@
1717
GZIP_MAX_READ_CHUNK = 100 * 1024 * 1024 # 100Mb
1818

1919

20+
class BufferedGzipFile(gzip.GzipFile):
21+
"""GzipFile capable to readinto buffer >= 2**32 bytes."""
22+
# Speedup for #209; breaks in Python 3.5
23+
def __init__(self, fileish, mode='rb', compresslevel=9, buffer_size=2**32-1):
24+
super(BufferedGzipFile, self).__init__(fileish, mode=mode, compresslevel=compresslevel)
25+
if hasattr(self, 'max_chunk_read'):
26+
gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK
27+
self.buffer_size = buffer_size
28+
29+
def readinto(self, buf):
30+
"""Uses self.buffer_size to do a buffered read."""
31+
n_bytes = len(buf)
32+
try:
33+
# This works around a known issue in Python 3.5.
34+
# See https://bugs.python.org/issue25626
35+
mv = memoryview(buf)
36+
n_read = 0
37+
max_read = 2 ** 32 - 1 # Max for unsigned 32-bit integer
38+
while (n_read < n_bytes):
39+
n_wanted = min(n_bytes - n_read, max_read)
40+
n_got = super(BufferedGzipFile, self).readinto(
41+
mv[n_read:n_read + n_wanted])
42+
n_read += n_got
43+
if n_got != n_wanted:
44+
break
45+
except NameError: # Python 2.6 or old 2.7: memoryview does not exist.
46+
raise
47+
n_read = super(BufferedGzipFile, self).readinto(buf)
48+
return n_read
49+
50+
2051
def _gzip_open(fileish, *args, **kwargs):
2152
# open gzip files with faster reads on large files using larger chunks
2253
# See https://github.com/nipy/nibabel/pull/210 for discussion
23-
gzip_file = gzip.open(fileish, *args, **kwargs)
24-
gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK
54+
55+
gzip_file = BufferedGzipFile(fileish, *args, **kwargs)
2556
return gzip_file
2657

2758

0 commit comments

Comments
 (0)