Skip to content

Commit 4b5f794

Browse files
committed
Implement arrange output buffer for decompress object
1 parent ff2cb9a commit 4b5f794

File tree

1 file changed

+28
-50
lines changed

1 file changed

+28
-50
lines changed

src/isal/isal_zlib.pyx

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ cdef arrange_output_buffer_with_maximum(stream_or_state *stream,
147147
occupied = stream.next_out - buffer[0]
148148
if length == occupied:
149149
if length == max_length:
150-
raise MemoryError("Buffer has reached maximum size")
150+
return -2
151151
if length <= max_length >> 1:
152152
new_length = length << 1
153153
else:
@@ -166,6 +166,8 @@ cdef arrange_output_buffer_with_maximum(stream_or_state *stream,
166166
cdef arrange_output_buffer(stream_or_state *stream, unsigned char **buffer, Py_ssize_t length):
167167
cdef Py_ssize_t ret
168168
ret = arrange_output_buffer_with_maximum(stream, buffer, length, PY_SSIZE_T_MAX)
169+
if ret == -2:
170+
raise MemoryError("Output buffer has reached maximum size")
169171
return ret
170172

171173
cdef void arrange_input_buffer(stream_or_state *stream, Py_ssize_t *remains):
@@ -511,8 +513,6 @@ cdef class Decompress:
511513
cdef public bint eof
512514
cdef bint is_initialised
513515
cdef inflate_state stream
514-
cdef unsigned char * obuf
515-
cdef unsigned int obuflen
516516
cdef bint method_set
517517

518518
def __cinit__(self, wbits=ISAL_DEF_MAX_HIST_BITS, zdict = None):
@@ -534,17 +534,11 @@ cdef class Decompress:
534534
err = isal_inflate_set_dict(&self.stream, zdict, zdict_length)
535535
if err != COMP_OK:
536536
check_isal_deflate_rc(err)
537-
self.obuflen = DEF_BUF_SIZE
538-
self.obuf = <unsigned char *>PyMem_Malloc(self.obuflen * sizeof(char))
539537
self.unused_data = b""
540538
self.unconsumed_tail = b""
541-
self.eof = 0
539+
self.eof = False
542540
self.is_initialised = 1
543541

544-
def __dealloc__(self):
545-
if self.obuf is not NULL:
546-
PyMem_Free(self.obuf)
547-
548542
def _view_bitbuffer(self):
549543
"""Shows the 64-bitbuffer of the internal inflate_state. It contains
550544
a maximum of 8 bytes. This data is already read-in so is not part
@@ -594,11 +588,13 @@ cdef class Decompress:
594588
unconsumed_tail attribute.
595589
"""
596590

597-
cdef Py_ssize_t total_bytes = 0
591+
cdef Py_ssize_t hard_limit
598592
if max_length == 0:
599-
max_length = PY_SSIZE_T_MAX
593+
hard_limit = PY_SSIZE_T_MAX
600594
elif max_length < 0:
601595
raise ValueError("max_length can not be smaller than 0")
596+
else:
597+
hard_limit = max_length
602598

603599
if not self.method_set:
604600
# Try to detect method from the first two bytes of the data.
@@ -613,50 +609,42 @@ cdef class Decompress:
613609
cdef Py_ssize_t ibuflen = buffer.len
614610
cdef unsigned char * ibuf = <unsigned char*>buffer.buf
615611
self.stream.next_in = ibuf
616-
self.stream.avail_out = 0
617-
cdef unsigned int prev_avail_out
618-
cdef unsigned int bytes_written
619-
cdef Py_ssize_t unused_bytes
612+
620613
cdef int err
614+
cdef bint max_length_reached = False
621615

622616
# Initialise output buffer
623-
out = []
617+
cdef unsigned char *obuf = NULL
618+
cdef Py_ssize_t obuflen = DEF_BUF_SIZE
619+
if obuflen > max_length:
620+
obuflen = max_length
624621

625-
cdef bint last_round = 0
626622
try:
627623
# This loop reads all the input bytes. If there are no input bytes
628624
# anymore the output is written.
629625
while True:
630626
arrange_input_buffer(&self.stream, &ibuflen)
631-
while (self.stream.avail_out == 0 or self.stream.avail_in != 0):
632-
self.stream.next_out = self.obuf # Reset output buffer.
633-
if total_bytes >= max_length:
627+
while True:#(self.stream.avail_out == 0 or self.stream.avail_in != 0):
628+
obuflen = arrange_output_buffer_with_maximum(
629+
&self.stream, &obuf, obuflen, hard_limit)
630+
if obuflen == -2:
631+
max_length_reached = True
634632
break
635-
elif total_bytes + self.obuflen >= max_length:
636-
self.stream.avail_out = max_length - total_bytes
637-
# The inflate process may not fill all available bytes so
638-
# we make sure this is the last round.
639-
last_round = 1
640-
else:
641-
self.stream.avail_out = self.obuflen
642-
prev_avail_out = self.stream.avail_out
643633
err = isal_inflate(&self.stream)
644634
if err != ISAL_DECOMP_OK:
645635
# There is some python interacting when possible exceptions
646636
# Are raised. So we remain in pure C code if we check for
647637
# COMP_OK first.
648638
check_isal_inflate_rc(err)
649-
bytes_written = prev_avail_out - self.stream.avail_out
650-
total_bytes += bytes_written
651-
out.append(self.obuf[:bytes_written])
652-
if self.stream.block_state == ISAL_BLOCK_FINISH or last_round:
639+
if self.stream.avail_out != 0:
653640
break
654-
if self.stream.block_state == ISAL_BLOCK_FINISH or ibuflen ==0:
641+
if self.stream.block_state == ISAL_BLOCK_FINISH or ibuflen ==0 or max_length_reached:
655642
break
656643
self.save_unconsumed_input(buffer)
657-
return b"".join(out)
644+
return PyBytes_FromStringAndSize(<char*>obuf, self.stream.next_out - obuf)
658645
finally:
659646
PyBuffer_Release(buffer)
647+
PyMem_Free(obuf)
660648

661649
def flush(self, Py_ssize_t length = DEF_BUF_SIZE):
662650
"""
@@ -667,8 +655,7 @@ cdef class Decompress:
667655
"""
668656
if length <= 0:
669657
raise ValueError("Length must be greater than 0")
670-
if length > UINT32_MAX:
671-
raise ValueError("Length should not be larger than 4GB.")
658+
672659
cdef Py_buffer buffer_data
673660
cdef Py_buffer* buffer = &buffer_data
674661
if PyObject_GetBuffer(self.unconsumed_tail, buffer, PyBUF_READ & PyBUF_C_CONTIGUOUS) != 0:
@@ -677,19 +664,14 @@ cdef class Decompress:
677664
cdef unsigned char * ibuf = <unsigned char*>buffer.buf
678665
self.stream.next_in = ibuf
679666

680-
cdef unsigned int total_bytes = 0
681-
cdef unsigned int bytes_written
682-
out = []
683667
cdef unsigned int obuflen = length
684-
cdef unsigned char * obuf = <unsigned char *>PyMem_Malloc(obuflen * sizeof(char))
685-
cdef Py_ssize_t unused_bytes
668+
cdef unsigned char * obuf = NULL
686669

687670
try:
688671
while True:
689672
arrange_input_buffer(&self.stream, &ibuflen)
690673
while True:
691-
self.stream.next_out = obuf # Reset output buffer.
692-
self.stream.avail_out = obuflen
674+
obuflen = arrange_output_buffer(&self.stream, &obuf, obuflen)
693675
err = isal_inflate(&self.stream)
694676
if err != ISAL_DECOMP_OK:
695677
# There is some python interacting when possible exceptions
@@ -699,18 +681,14 @@ cdef class Decompress:
699681
# Instead of output buffer resizing as the zlibmodule.c example
700682
# the data is appended to a list.
701683
# TODO: Improve this with the buffer protocol.
702-
if self.stream.avail_out == obuflen:
703-
break
704-
bytes_written = obuflen - self.stream.avail_out
705-
total_bytes += bytes_written
706-
out.append(obuf[:bytes_written])
707684
if self.stream.avail_out != 0:
708685
break
709686
if self.stream.block_state == ISAL_BLOCK_FINISH or ibuflen == 0:
710687
break
711688
self.save_unconsumed_input(buffer)
712-
return b"".join(out)
689+
return PyBytes_FromStringAndSize(<char*>obuf, self.stream.next_out - obuf)
713690
finally:
691+
PyBuffer_Release(buffer)
714692
PyMem_Free(obuf)
715693

716694

0 commit comments

Comments
 (0)