Skip to content

Commit 1ae56eb

Browse files
committed
Nearly working implementation
1 parent 8686dee commit 1ae56eb

File tree

2 files changed

+66
-11
lines changed

2 files changed

+66
-11
lines changed

src/isal/igzip.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
_COMPRESS_LEVEL_BEST = isal_zlib.ISAL_BEST_COMPRESSION
3939
_BLOCK_SIZE = 64*1024
4040

41+
BUFFER_SIZE = _compression.BUFFER_SIZE
4142

4243
# The open method was copied from the python source with minor adjustments.
4344
def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_TRADEOFF,
@@ -146,9 +147,57 @@ def write(self, data):
146147
# to do so in pure python.
147148
class _IGzipReader(_compression.DecompressReader):
148149
def __init__(self, fp):
149-
super().__init__(fp, isal_zlib.decompressobj,
150+
super().__init__(gzip._PaddedFile(fp), isal_zlib.decompressobj,
151+
trailing_error=isal_zlib.IsalError,
150152
wbits=16 + isal_zlib.MAX_WBITS)
151153

154+
# Created by mixing and matching gzip._GzipReader and
155+
# _compression.DecompressReader
156+
def read(self, size=-1):
157+
if size < 0:
158+
return self.readall()
159+
# size=0 is special because decompress(max_length=0) is not supported
160+
if not size:
161+
return b""
162+
163+
# For certain input data, a single
164+
# call to decompress() may not return
165+
# any data. In this case, retry until we get some data or reach EOF.
166+
uncompress = b""
167+
while True:
168+
if self._decompressor.eof:
169+
buf = (self._decompressor.unused_data or
170+
self._fp.read(BUFFER_SIZE))
171+
if not buf:
172+
break
173+
# Continue to next stream.
174+
self._decompressor = self._decomp_factory(
175+
**self._decomp_args)
176+
try:
177+
uncompress = self._decompressor.decompress(buf, size)
178+
except self._trailing_error:
179+
# Trailing data isn't a valid compressed stream; ignore it.
180+
break
181+
else:
182+
# Read a chunk of data from the file
183+
buf = self._fp.read(BUFFER_SIZE)
184+
uncompress = self._decompressor.decompress(buf, size)
185+
if self._decompressor.unconsumed_tail != b"":
186+
self._fp.prepend(self._decompressor.unconsumed_tail)
187+
elif self._decompressor.unused_data != b"":
188+
# Prepend the already read bytes to the fileobj so they can
189+
# be seen by _read_eof() and _read_gzip_header()
190+
self._fp.prepend(self._decompressor.unused_data)
191+
192+
if uncompress != b"":
193+
break
194+
if buf == b"":
195+
raise EOFError("Compressed file ended before the "
196+
"end-of-stream marker was reached")
197+
198+
self._pos += len(uncompress)
199+
return uncompress
200+
152201

153202
# Plagiarized from gzip.py from python's stdlib.
154203
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
@@ -162,12 +211,19 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
162211
return buf.getvalue()
163212

164213

214+
# Unlike stdlib, do not use the roundabout way of doing this via a file.
215+
# def decompress(data):
216+
# """Decompress a gzip compressed string in one shot.
217+
# Return the decompressed string.
218+
# """
219+
# return isal_zlib.decompress(data, wbits=16 + isal_zlib.MAX_WBITS)
220+
165221
def decompress(data):
166222
"""Decompress a gzip compressed string in one shot.
167223
Return the decompressed string.
168224
"""
169-
return isal_zlib.decompress(data, wbits=16 + isal_zlib.MAX_WBITS)
170-
225+
with IGzipFile(fileobj=io.BytesIO(data)) as f:
226+
return f.read()
171227

172228
def main():
173229
parser = argparse.ArgumentParser()

src/isal/isal_zlib.pyx

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,8 @@ cdef class Compress:
352352

353353
cdef class Decompress:
354354
cdef public bytes unused_data
355-
cdef public unconsumed_tail
355+
cdef public bytes unconsumed_tail
356356
cdef public bint eof
357-
cdef public bint needs_input
358357
cdef bint is_initialised
359358
cdef inflate_state stream
360359
cdef unsigned char * obuf
@@ -381,7 +380,6 @@ cdef class Decompress:
381380
self.unconsumed_tail = b""
382381
self.eof = 0
383382
self.is_initialised = 1
384-
self.needs_input = 1
385383

386384
def __dealloc__(self):
387385
if self.obuf is not NULL:
@@ -420,8 +418,7 @@ cdef class Decompress:
420418
# This loop reads all the input bytes. If there are no input bytes
421419
# anymore the output is written.
422420
while (self.stream.avail_out == 0
423-
or self.stream.avail_in != 0
424-
or self.stream.block_state != ISAL_BLOCK_FINISH):
421+
or self.stream.avail_in != 0):
425422
self.stream.next_out = self.obuf # Reset output buffer.
426423
if total_bytes >= max_length:
427424
break
@@ -458,16 +455,18 @@ cdef class Decompress:
458455
# 1. Output limit was reached. Save leftover input in unconsumed_tail.
459456
# 2. All input data was consumed. Clear unconsumed_tail.
460457
unused_bytes = self.stream.avail_in
461-
self.unconsumed_tail = data[-unused_bytes:]
462-
self.needs_input = 0 if unused_bytes > 0 else 1
458+
if unused_bytes == 0:
459+
self.unconsumed_tail = b""
460+
else:
461+
self.unconsumed_tail = data[-unused_bytes:]
463462
return b"".join(out)
464463

465464
def flush(self, Py_ssize_t length = DEF_BUF_SIZE):
466465
if length <= 0:
467466
raise ValueError("Length must be greater than 0")
468467
if length > UINT32_MAX:
469468
raise ValueError("Length should not be larger than 4GB.")
470-
data = self.unconsumed_tail
469+
data = self.unconsumed_tail[:]
471470
cdef Py_ssize_t ibuflen = len(data)
472471
if ibuflen > UINT32_MAX:
473472
# This should never happen, because we check the input size in

0 commit comments

Comments
 (0)