|
17 | 17 | GZIP_MAX_READ_CHUNK = 100 * 1024 * 1024 # 100Mb
|
18 | 18 |
|
19 | 19 |
|
| 20 | +class BufferedGzipFile(gzip.GzipFile): |
| 21 | + """GzipFile capable to readinto buffer >= 2**32 bytes.""" |
| 22 | + # Speedup for #209; breaks in Python 3.5 |
| 23 | + def __init__(self, fileish, mode='rb', compresslevel=9, buffer_size=2**32-1): |
| 24 | + super(BufferedGzipFile, self).__init__(fileish, mode=mode, compresslevel=compresslevel) |
| 25 | + if hasattr(self, 'max_chunk_read'): |
| 26 | + gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK |
| 27 | + self.buffer_size = buffer_size |
| 28 | + |
| 29 | + def readinto(self, buf): |
| 30 | + """Uses self.buffer_size to do a buffered read.""" |
| 31 | + n_bytes = len(buf) |
| 32 | + try: |
| 33 | + # This works around a known issue in Python 3.5. |
| 34 | + # See https://bugs.python.org/issue25626 |
| 35 | + mv = memoryview(buf) |
| 36 | + n_read = 0 |
| 37 | + max_read = 2 ** 32 - 1 # Max for unsigned 32-bit integer |
| 38 | + while (n_read < n_bytes): |
| 39 | + n_wanted = min(n_bytes - n_read, max_read) |
| 40 | + n_got = super(BufferedGzipFile, self).readinto( |
| 41 | + mv[n_read:n_read + n_wanted]) |
| 42 | + n_read += n_got |
| 43 | + if n_got != n_wanted: |
| 44 | + break |
| 45 | + except NameError: # Python 2.6 or old 2.7: memoryview does not exist. |
| 46 | + raise |
| 47 | + n_read = super(BufferedGzipFile, self).readinto(buf) |
| 48 | + return n_read |
| 49 | + |
| 50 | + |
20 | 51 | def _gzip_open(fileish, *args, **kwargs):
|
21 | 52 | # open gzip files with faster reads on large files using larger chunks
|
22 | 53 | # See https://github.com/nipy/nibabel/pull/210 for discussion
|
23 |
| - gzip_file = gzip.open(fileish, *args, **kwargs) |
24 |
| - gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK |
| 54 | + |
| 55 | + gzip_file = BufferedGzipFile(fileish, *args, **kwargs) |
25 | 56 | return gzip_file
|
26 | 57 |
|
27 | 58 |
|
|
0 commit comments