Skip to content

Commit 52949ea

Browse files
committed
Simplify code and fix segfaults by using ISAL_GZIP_NO_HDR
1 parent 04ac8e7 commit 52949ea

File tree

2 files changed

+30
-52
lines changed

2 files changed

+30
-52
lines changed

src/isal/igzip.py

Lines changed: 23 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -150,59 +150,30 @@ def write(self, data):
150150
# side of things. It is much simpler. Gzip header interpretation and gzip
151151
# checksum checking is already implemented in the isa-l library. So no need
152152
# to do so in pure python.
153-
class _IGzipReader(_compression.DecompressReader):
153+
class _IGzipReader(gzip._GzipReader):
154154
def __init__(self, fp):
155-
super().__init__(gzip._PaddedFile(fp), isal_zlib.decompressobj,
156-
trailing_error=isal_zlib.IsalError,
157-
wbits=16 + isal_zlib.MAX_WBITS)
158-
159-
# Created by mixing and matching gzip._GzipReader and
160-
# _compression.DecompressReader
161-
def read(self, size=-1):
162-
if size < 0:
163-
return self.readall()
164-
# size=0 is special because decompress(max_length=0) is not supported
165-
if not size:
166-
return b""
167-
168-
# For certain input data, a single
169-
# call to decompress() may not return
170-
# any data. In this case, retry until we get some data or reach EOF.
171-
uncompress = b""
172-
while True:
173-
if self._decompressor.eof:
174-
buf = (self._decompressor.unused_data or
175-
self._fp.read(BUFFER_SIZE))
176-
if not buf:
177-
break
178-
# Continue to next stream.
179-
self._decompressor = self._decomp_factory(
180-
**self._decomp_args)
181-
try:
182-
uncompress = self._decompressor.decompress(buf, size)
183-
except self._trailing_error:
184-
# Trailing data isn't a valid compressed stream; ignore it.
185-
break
186-
else:
187-
# Read a chunk of data from the file
188-
buf = self._fp.read(BUFFER_SIZE)
189-
uncompress = self._decompressor.decompress(buf, size)
190-
if self._decompressor.unconsumed_tail != b"":
191-
self._fp.prepend(self._decompressor.unconsumed_tail)
192-
elif self._decompressor.unused_data != b"":
193-
# Prepend the already read bytes to the fileobj so they can
194-
# be seen by _read_eof() and _read_gzip_header()
195-
self._fp.prepend(self._decompressor.unused_data)
196-
197-
if uncompress != b"":
198-
break
199-
if buf == b"":
200-
raise EOFError("Compressed file ended before the "
201-
"end-of-stream marker was reached")
202-
203-
self._pos += len(uncompress)
204-
return uncompress
205-
155+
super().__init__(fp)
156+
self._decomp_factory = isal_zlib.decompressobj
157+
self._decomp_args = dict(wbits=64+isal_zlib.MAX_WBITS)
158+
self._decompressor = self._decomp_factory(**self._decomp_args)
159+
160+
def _add_read_data(self, data):
161+
self._crc = isal_zlib.crc32(data, self._crc)
162+
self._stream_size = self._stream_size + len(data)
163+
164+
def _read_eof(self):
165+
crc32 = self._decompressor.crc
166+
if crc32 != self._crc:
167+
raise BadGzipFile(
168+
f"CRC check failed {hex(crc32)} != {hex(self._crc)}")
169+
# Gzip files can be padded with zeroes and still have archives.
170+
# Consume all zero bytes and set the file position to the first
171+
# non-zero byte. See http://www.gzip.org/#faq8
172+
c = b"\x00"
173+
while c == b"\x00":
174+
c = self._fp.read(1)
175+
if c:
176+
self._fp.prepend(c)
206177

207178
# Plagiarized from gzip.py from python's stdlib.
208179
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):

src/isal/isal_zlib.pyx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,10 @@ cdef class Decompress:
540540
finally:
541541
PyMem_Free(obuf)
542542

543+
@property
544+
def crc(self):
545+
return self.stream.crc
546+
543547
cdef wbits_to_flag_and_hist_bits_deflate(int wbits,
544548
unsigned short * hist_bits,
545549
unsigned short * gzip_flag):
@@ -571,6 +575,9 @@ cdef wbits_to_flag_and_hist_bits_inflate(int wbits,
571575
elif -15 <= wbits <= -8: # raw compressed stream
572576
hist_bits[0] = -wbits
573577
crc_flag[0] = ISAL_DEFLATE
578+
elif 72 <=wbits <= 79:
579+
hist_bits[0] = wbits - 64
580+
crc_flag[0] = ISAL_GZIP_NO_HDR
574581
else:
575582
raise ValueError("Invalid wbits value")
576583

0 commit comments

Comments
 (0)