Skip to content

Commit ecb03c6

Browse files
authored
Merge pull request #134 from jakirkham/use_buffered_BytesIO
Cutdown copies with BytesIO
2 parents bc61f5c + 5379023 commit ecb03c6

File tree

3 files changed

+40
-9
lines changed

3 files changed

+40
-9
lines changed

docs/release.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ Release notes
3030
* Add Python 3.7 (by :user:`John Kirkham <jakirkham>`; :issue:`92`).
3131

3232
* Add codec :class:`numcodecs.gzip.GZip` to replace ``gzip`` alias for ``zlib``,
33-
which was incorrect (by :user:`Jan Funke <funkey>`; :issue:`87`).
33+
which was incorrect (by :user:`Jan Funke <funkey>`; :issue:`87`; and :user:`John Kirkham <jakirkham>`, :issue:`134`).
3434

3535
* Corrects handling of ``NaT`` in ``datetime64`` and ``timedelta64`` in various
3636
compressors (by :user:`John Kirkham <jakirkham>`; :issue:`127`, :issue:`131`).

numcodecs/gzip.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
from .abc import Codec
8-
from .compat import ndarray_copy, ensure_contiguous_ndarray, PY2
8+
from .compat import ensure_bytes, ensure_ndarray, ensure_contiguous_ndarray, PY2
99

1010

1111
class GZip(Codec):
@@ -38,24 +38,39 @@ def encode(self, buf):
3838
mode='wb',
3939
compresslevel=self.level) as compressor:
4040
compressor.write(buf)
41-
compressed = compressed.getvalue()
4241

43-
return compressed
42+
try:
43+
compressed = compressed.getbuffer()
44+
except AttributeError: # pragma: py3 no cover
45+
compressed = compressed.getvalue()
46+
47+
return ensure_ndarray(compressed)
4448

4549
# noinspection PyMethodMayBeStatic
4650
def decode(self, buf, out=None):
4751

4852
# normalise inputs
49-
buf = ensure_contiguous_ndarray(buf)
50-
if out is not None:
51-
out = ensure_contiguous_ndarray(out)
53+
if PY2: # pragma: py3 no cover
54+
# On Python 2, BytesIO always copies.
55+
# Merely ensure the data supports the (new) buffer protocol.
56+
buf = ensure_contiguous_ndarray(buf)
57+
else: # pragma: py2 no cover
58+
# BytesIO only copies if the data is not of `bytes` type.
59+
# This allows `bytes` objects to pass through without copying.
60+
buf = ensure_bytes(buf)
5261

5362
# do decompression
5463
buf = io.BytesIO(buf)
5564
with _gzip.GzipFile(fileobj=buf, mode='rb') as decompressor:
56-
decompressed = decompressor.read()
65+
if out is not None:
66+
out_view = ensure_contiguous_ndarray(out)
67+
decompressor.readinto(out_view)
68+
if decompressor.read(1) != b'':
69+
raise ValueError("Unable to fit data into `out`")
70+
else:
71+
out = ensure_ndarray(decompressor.read())
5772

5873
# handle destination - Python standard library zlib module does not
5974
# support direct decompression into buffer, so we have to copy into
6075
# out if given
61-
return ndarray_copy(decompressed, out)
76+
return out

numcodecs/tests/test_gzip.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,19 @@ def test_err_encode_non_contiguous():
9393
for codec in codecs:
9494
with pytest.raises(ValueError):
9595
codec.encode(arr)
96+
97+
98+
def test_err_out_too_small():
99+
arr = np.arange(10, dtype='i4')
100+
out = np.empty_like(arr)[:-1]
101+
for codec in codecs:
102+
with pytest.raises(ValueError):
103+
codec.decode(codec.encode(arr), out)
104+
105+
106+
def test_out_too_large():
107+
out = np.empty((10,), dtype='i4')
108+
arr = out[:-1]
109+
arr[:] = 5
110+
for codec in codecs:
111+
codec.decode(codec.encode(arr), out)

0 commit comments

Comments
 (0)