Skip to content

Commit 994d40b

Browse files
committed
Work on uncompressed tail
1 parent 72d5c97 commit 994d40b

File tree

3 files changed

+31
-15
lines changed

3 files changed

+31
-15
lines changed

src/isal/igzip.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ def __init__(self, filename=None, mode=None,
103103
super().__init__(filename, mode, compresslevel, fileobj, mtime)
104104
if hasattr(self, "compress"):
105105
self.compress = isal_zlib.compressobj(compresslevel,
106-
isal_zlib.DEFLATED,
107-
-isal_zlib.MAX_WBITS,
108-
isal_zlib.DEF_MEM_LEVEL,
109-
0)
106+
isal_zlib.DEFLATED,
107+
-isal_zlib.MAX_WBITS,
108+
isal_zlib.DEF_MEM_LEVEL,
109+
0)
110110
if self.mode == gzip.READ:
111111
raw = _IGzipReader(self.fileobj)
112112
self._buffer = io.BufferedReader(raw)

src/isal/isal_zlib.pyx

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -408,16 +408,17 @@ cdef class Decompress:
408408
self.stream.avail_in = total_length
409409
self.stream.avail_out = 0
410410
cdef unsigned long prev_avail_out
411+
cdef unsigned long bytes_written
411412
cdef int err
412413
# This loop reads all the input bytes. If there are no input bytes
413414
# anymore the output is written.
414415
while (self.stream.avail_out == 0
415416
or self.stream.avail_in != 0
416417
or self.stream.block_state != ISAL_BLOCK_FINISH):
417418
self.stream.next_out = self.obuf # Reset output buffer.
418-
if total_bytes > max_length:
419+
if total_bytes >= max_length:
419420
break
420-
elif total_bytes + self.obuflen > max_length:
421+
elif total_bytes + self.obuflen >= max_length:
421422
self.stream.avail_out = max_length - total_bytes
422423
else:
423424
self.stream.avail_out = self.obuflen
@@ -428,8 +429,9 @@ cdef class Decompress:
428429
# Are raised. So we remain in pure C code if we check for
429430
# COMP_OK first.
430431
check_isal_inflate_rc(err)
431-
total_bytes += self.stream.avail_out
432-
out.append(self.obuf[:prev_avail_out - self.stream.avail_out])
432+
bytes_written = prev_avail_out - self.stream.avail_out
433+
total_bytes += bytes_written
434+
out.append(self.obuf[:bytes_written])
433435
if self.stream.block_state == ISAL_BLOCK_FINISH:
434436
break
435437
# Save unconsumed input implementation from zlibmodule.c
@@ -438,7 +440,7 @@ cdef class Decompress:
438440
# leftover input data in self->unused_data.
439441
self.eof = 1
440442
if self.stream.avail_in > 0:
441-
self.unused_data = self.stream.next_in[:]
443+
self.unused_data = data[total_bytes:]
442444
self.stream.avail_in = 0
443445
if self.stream.avail_in > 0 or self.unconsumed_tail:
444446
# This code handles two distinct cases:
@@ -452,14 +454,16 @@ cdef class Decompress:
452454
raise ValueError("Length must be greater than 0")
453455
if length > UINT32_MAX:
454456
raise ValueError("Length should not be larger than 4GB.")
455-
cdef Py_ssize_t ibuflen = len(self.unconsumed_tail)
457+
data = self.unconsumed_tail
458+
cdef Py_ssize_t ibuflen = len(data)
456459
if ibuflen > UINT32_MAX:
457460
# This should never happen, because we check the input size in
458461
# the decompress function as well.
459462
raise IsalError("Unconsumed tail too large. Can not flush.")
460-
self.stream.next_in = self.unconsumed_tail
463+
self.stream.next_in = data
461464
self.stream.avail_in = ibuflen
462-
465+
cdef unsigned long total_bytes = 0
466+
cdef unsigned long bytes_written
463467
out = []
464468
cdef unsigned long obuflen = length
465469
cdef unsigned char * obuf = <unsigned char *>PyMem_Malloc(obuflen * sizeof(char))
@@ -478,20 +482,22 @@ cdef class Decompress:
478482
# Instead of output buffer resizing as the zlibmodule.c example
479483
# the data is appended to a list.
480484
# TODO: Improve this with the buffer protocol.
481-
out.append(obuf[:obuflen - self.stream.avail_out])
485+
bytes_written = obuflen - self.stream.avail_out
486+
total_bytes += bytes_written
487+
out.append(obuf[:bytes_written])
482488
if self.stream.block_state == ISAL_BLOCK_FINISH:
483489
# The end of the compressed data has been reached. Store the
484490
# leftover input data in self->unused_data.
485491
self.eof = 1
486492
self.is_initialised = 0
487493
if self.stream.avail_in > 0:
488-
self.unused_data = self.stream.next_in[:]
494+
self.unused_data = data[total_bytes:]
489495
self.stream.avail_in = 0
490496
if self.stream.avail_in > 0 or self.unconsumed_tail:
491497
# This code handles two distinct cases:
492498
# 1. Output limit was reached. Save leftover input in unconsumed_tail.
493499
# 2. All input data was consumed. Clear unconsumed_tail.
494-
self.unconsumed_tail = self.stream.next_in[:]
500+
self.unconsumed_tail = data[total_bytes:]
495501
return b"".join(out)
496502
finally:
497503
PyMem_Free(obuf)

tests/test_isal.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ def test_decompress_decompressobj(data_size, level, wbits, memLevel):
119119
decompressobj = isal_zlib.decompressobj(wbits=wbits)
120120
decompressed = decompressobj.decompress(compressed) + decompressobj.flush()
121121
assert data == decompressed
122+
assert decompressobj.unused_data == b""
123+
assert decompressobj.unconsumed_tail == b""
124+
125+
126+
def test_decompressobj_unconsumed_tail():
127+
data = DATA[:128*1024]
128+
compressed = zlib.compress(data)
129+
decompressobj = isal_zlib.decompressobj()
130+
output = decompressobj.decompress(compressed, 2048)
131+
assert len(output) == 2048
122132

123133

124134
@pytest.mark.parametrize(["data_size", "level"],

0 commit comments

Comments
 (0)