Skip to content

Commit 9ea53a2

Browse files
committed
Use buffer protocol support
1 parent f57fecb commit 9ea53a2

File tree

2 files changed

+111
-73
lines changed

2 files changed

+111
-73
lines changed

src/isal/isal_zlib.pyx

Lines changed: 101 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ from .crc cimport crc32_gzip_refl
2828
from .igzip_lib cimport *
2929
from libc.stdint cimport UINT64_MAX, UINT32_MAX, uint32_t
3030
from cpython.mem cimport PyMem_Malloc, PyMem_Free
31+
from cpython.buffer cimport PyBUF_READ, PyBUF_C_CONTIGUOUS, PyObject_GetBuffer, \
32+
PyBuffer_Release
33+
34+
3135

3236
cdef extern from "<Python.h>":
3337
const Py_ssize_t PY_SSIZE_T_MAX
@@ -80,16 +84,28 @@ if ISAL_DEF_MAX_HIST_BITS > zlib.MAX_WBITS:
8084

8185

8286
cpdef adler32(data, unsigned long value = 1):
83-
cdef Py_ssize_t length = len(data)
84-
if length > UINT64_MAX:
85-
raise ValueError("Data too big for adler32")
86-
return isal_adler32(value, data, length)
87+
cdef Py_buffer buffer_data
88+
cdef Py_buffer* buffer = &buffer_data
89+
if PyObject_GetBuffer(data, buffer, PyBUF_READ & PyBUF_C_CONTIGUOUS) != 0:
90+
raise TypeError("Failed to get buffer")
91+
try:
92+
if buffer.len > UINT64_MAX:
93+
raise ValueError("Data too big for adler32")
94+
return isal_adler32(value, <unsigned char*>buffer.buf, buffer.len)
95+
finally:
96+
PyBuffer_Release(buffer)
8797

8898
cpdef crc32(data, unsigned long value = 0):
89-
cdef Py_ssize_t length = len(data)
90-
if length > UINT64_MAX:
91-
raise ValueError("Data too big for crc32")
92-
return crc32_gzip_refl(value, data, length)
99+
cdef Py_buffer buffer_data
100+
cdef Py_buffer* buffer = &buffer_data
101+
if PyObject_GetBuffer(data, buffer, PyBUF_READ & PyBUF_C_CONTIGUOUS) != 0:
102+
raise TypeError("Failed to get buffer")
103+
try:
104+
if buffer.len > UINT64_MAX:
105+
raise ValueError("Data too big for adler32")
106+
return crc32_gzip_refl(value, <unsigned char*>buffer.buf, buffer.len)
107+
finally:
108+
PyBuffer_Release(buffer)
93109

94110
cdef Py_ssize_t Py_ssize_t_min(Py_ssize_t a, Py_ssize_t b):
95111
if a <= b:
@@ -135,8 +151,12 @@ def compress(data,
135151
out = []
136152

137153
# initialise input
138-
cdef Py_ssize_t ibuflen = len(data)
139-
cdef unsigned char * ibuf = data
154+
cdef Py_buffer buffer_data
155+
cdef Py_buffer* buffer = &buffer_data
156+
if PyObject_GetBuffer(data, buffer, PyBUF_READ & PyBUF_C_CONTIGUOUS) != 0:
157+
raise TypeError("Failed to get buffer")
158+
cdef Py_ssize_t ibuflen = buffer.len
159+
cdef unsigned char * ibuf = <unsigned char*>buffer.buf
140160
stream.next_in = ibuf
141161

142162
# initialise helper variables
@@ -175,6 +195,7 @@ def compress(data,
175195
break
176196
return b"".join(out)
177197
finally:
198+
PyBuffer_Release(buffer)
178199
PyMem_Free(level_buf)
179200
PyMem_Free(obuf)
180201

@@ -193,8 +214,12 @@ cpdef decompress(data,
193214
&stream.crc_flag)
194215

195216
# initialise input
196-
cdef Py_ssize_t ibuflen = len(data)
197-
cdef unsigned char * ibuf = data
217+
cdef Py_buffer buffer_data
218+
cdef Py_buffer* buffer = &buffer_data
219+
if PyObject_GetBuffer(data, buffer, PyBUF_READ & PyBUF_C_CONTIGUOUS) != 0:
220+
raise TypeError("Failed to get buffer")
221+
cdef Py_ssize_t ibuflen = buffer.len
222+
cdef unsigned char * ibuf = <unsigned char*>buffer.buf
198223
stream.next_in = ibuf
199224

200225
# Initialise output buffer
@@ -234,6 +259,7 @@ cpdef decompress(data,
234259
raise IsalError("incomplete or truncated stream")
235260
return b"".join(out)
236261
finally:
262+
PyBuffer_Release(buffer)
237263
PyMem_Free(obuf)
238264

239265

@@ -302,36 +328,38 @@ cdef class Compress:
302328
out = []
303329

304330
# initialise input
305-
cdef Py_ssize_t total_length = len(data)
306-
if total_length > UINT32_MAX:
307-
# Zlib allows a maximum of 64 KB (16-bit length) and python has
308-
# integrated workarounds in order to compress up to 64 bits
309-
# lengths. This comes at a cost however. Considering 4 GB should
310-
# be ample for streaming applications, the workaround is not
311-
# implemented here. (It is in the stand-alone compress function).
312-
raise OverflowError("A maximum of 4 GB is allowed.")
313-
self.stream.next_in = data
314-
self.stream.avail_in = total_length
331+
cdef Py_buffer buffer_data
332+
cdef Py_buffer* buffer = &buffer_data
333+
if PyObject_GetBuffer(data, buffer, PyBUF_READ & PyBUF_C_CONTIGUOUS) != 0:
334+
raise TypeError("Failed to get buffer")
335+
cdef Py_ssize_t ibuflen = buffer.len
336+
cdef unsigned char * ibuf = <unsigned char*>buffer.buf
337+
self.stream.next_in = ibuf
315338

316339
# initialise helper variables
317340
cdef int err
318-
319-
# This loop reads all the input bytes. If there are no input bytes
320-
# anymore the output is written.
321-
while self.stream.avail_in != 0:
322-
self.stream.next_out = self.obuf # Reset output buffer.
323-
self.stream.avail_out = self.obuflen
324-
err = isal_deflate(&self.stream)
325-
if err != COMP_OK:
326-
# There is some python interacting when possible exceptions
327-
# Are raised. So we remain in pure C code if we check for
328-
# COMP_OK first.
329-
check_isal_deflate_rc(err)
330-
# Instead of output buffer resizing as the zlibmodule.c example
331-
# the data is appended to a list.
332-
# TODO: Improve this with the buffer protocol.
333-
out.append(self.obuf[:self.obuflen - self.stream.avail_out])
334-
return b"".join(out)
341+
try:
342+
while ibuflen !=0:
343+
# This loop runs n times (at least twice). n-1 times to fill the input
344+
# buffer with data. The nth time the input is empty. In that case
345+
# stream.flush is set to FULL_FLUSH and the end_of_stream is activated.
346+
arrange_input_buffer(&self.stream, &ibuflen)
347+
while self.stream.avail_in != 0:
348+
self.stream.next_out = self.obuf # Reset output buffer.
349+
self.stream.avail_out = self.obuflen
350+
err = isal_deflate(&self.stream)
351+
if err != COMP_OK:
352+
# There is some python interacting when possible exceptions
353+
# Are raised. So we remain in pure C code if we check for
354+
# COMP_OK first.
355+
check_isal_deflate_rc(err)
356+
# Instead of output buffer resizing as the zlibmodule.c example
357+
# the data is appended to a list.
358+
# TODO: Improve this with the buffer protocol.
359+
out.append(self.obuf[:self.obuflen - self.stream.avail_out])
360+
return b"".join(out)
361+
finally:
362+
PyBuffer_Release(buffer)
335363

336364
def flush(self, int mode=FULL_FLUSH):
337365
# Initialise stream
@@ -419,16 +447,14 @@ cdef class Decompress:
419447
elif max_length < 0:
420448
raise ValueError("max_length can not be smaller than 0")
421449

422-
cdef Py_ssize_t total_length = len(data)
423-
if total_length > UINT32_MAX:
424-
# Zlib allows a maximum of 64 KB (16-bit length) and python has
425-
# integrated workarounds in order to compress up to 64 bits
426-
# lengths. This comes at a cost however. Considering 4 GB should
427-
# be ample for streaming applications, the workaround is not
428-
# implemented here. (It is in the stand-alone compress function).
429-
raise OverflowError("A maximum of 4 GB is allowed.")
430-
self.stream.next_in = data
431-
self.stream.avail_in = total_length
450+
# initialise input
451+
cdef Py_buffer buffer_data
452+
cdef Py_buffer* buffer = &buffer_data
453+
if PyObject_GetBuffer(data, buffer, PyBUF_READ & PyBUF_C_CONTIGUOUS) != 0:
454+
raise TypeError("Failed to get buffer")
455+
cdef Py_ssize_t ibuflen = buffer.len
456+
cdef unsigned char * ibuf = <unsigned char*>buffer.buf
457+
self.stream.next_in = ibuf
432458
self.stream.avail_out = 0
433459
cdef unsigned long prev_avail_out
434460
cdef unsigned long bytes_written
@@ -443,34 +469,36 @@ cdef class Decompress:
443469
try:
444470
# This loop reads all the input bytes. If there are no input bytes
445471
# anymore the output is written.
446-
while (self.stream.avail_out == 0
447-
or self.stream.avail_in != 0):
448-
self.stream.next_out = obuf # Reset output buffer.
449-
if total_bytes >= max_length:
450-
break
451-
elif total_bytes + self.obuflen >= max_length:
452-
self.stream.avail_out = max_length - total_bytes
453-
# The inflate process may not fill all available bytes so
454-
# we make sure this is the last round.
455-
last_round = 1
456-
else:
457-
self.stream.avail_out = self.obuflen
458-
prev_avail_out = self.stream.avail_out
459-
err = isal_inflate(&self.stream)
460-
if err != ISAL_DECOMP_OK:
461-
# There is some python interacting when possible exceptions
462-
# Are raised. So we remain in pure C code if we check for
463-
# COMP_OK first.
464-
check_isal_inflate_rc(err)
465-
bytes_written = prev_avail_out - self.stream.avail_out
466-
total_bytes += bytes_written
467-
out.append(obuf[:bytes_written])
468-
if self.stream.block_state == ISAL_BLOCK_FINISH or last_round:
469-
break
472+
while self.stream.block_state != ISAL_BLOCK_FINISH and ibuflen !=0 and not last_round:
473+
arrange_input_buffer(&self.stream, &ibuflen)
474+
while (self.stream.avail_out == 0 or self.stream.avail_in != 0):
475+
self.stream.next_out = obuf # Reset output buffer.
476+
if total_bytes >= max_length:
477+
break
478+
elif total_bytes + self.obuflen >= max_length:
479+
self.stream.avail_out = max_length - total_bytes
480+
# The inflate process may not fill all available bytes so
481+
# we make sure this is the last round.
482+
last_round = 1
483+
else:
484+
self.stream.avail_out = self.obuflen
485+
prev_avail_out = self.stream.avail_out
486+
err = isal_inflate(&self.stream)
487+
if err != ISAL_DECOMP_OK:
488+
# There is some python interacting when possible exceptions
489+
# Are raised. So we remain in pure C code if we check for
490+
# COMP_OK first.
491+
check_isal_inflate_rc(err)
492+
bytes_written = prev_avail_out - self.stream.avail_out
493+
total_bytes += bytes_written
494+
out.append(obuf[:bytes_written])
495+
if self.stream.block_state == ISAL_BLOCK_FINISH or last_round:
496+
break
470497
self.save_unconsumed_input(data)
471498
return b"".join(out)
472499
finally:
473500
PyMem_Free(obuf)
501+
PyBuffer_Release(buffer)
474502

475503
def flush(self, Py_ssize_t length = DEF_BUF_SIZE):
476504
if length <= 0:

tox.ini

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@ commands =
1111
coverage html
1212
coverage xml
1313

14+
[testenv:compliance]
15+
deps=pytest
16+
commands=
17+
pytest tests/test_zlib_compliance.py tests/test_gzip_compliance.py
18+
19+
[testenv:compatibility]
20+
deps=pytest
21+
commands=
22+
pytest tests/test_isal.py
23+
1424
[testenv:lint]
1525
deps=flake8
1626
flake8-import-order

0 commit comments

Comments
 (0)