Skip to content

Commit d445400

Browse files
committed
Faster decompress implementation
1 parent 4129bf4 commit d445400

File tree

1 file changed

+47
-2
lines changed

1 file changed

+47
-2
lines changed

src/isal/igzip.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@
2525
import gzip
2626
import io
2727
import os
28+
import struct
2829
import sys
30+
from typing import List
2931

3032
from . import isal_zlib
3133

@@ -35,6 +37,8 @@
3537
_COMPRESS_LEVEL_TRADEOFF = isal_zlib.ISAL_DEFAULT_COMPRESSION
3638
_COMPRESS_LEVEL_BEST = isal_zlib.ISAL_BEST_COMPRESSION
3739

40+
FTEXT, FHCRC, FHEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
41+
3842
try:
3943
BadGzipFile = gzip.BadGzipFile # type: ignore
4044
except AttributeError: # Versions lower than 3.8 do not have BadGzipFile
@@ -229,12 +233,53 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
229233
return buf.getvalue()
230234

231235

236+
def _gzip_header_end(data: bytes) -> int:
237+
if len(data) < 10:
238+
raise ValueError("Gzip header should be 10 bytes or more")
239+
magic, method, flags, mtime, xfl, os = struct.unpack("<HBBIBB", data[:10], )
240+
if magic != 0x8b1f:
241+
raise BadGzipFile(f"Not a gzipped file ({repr(data[:2])})")
242+
if method != 8:
243+
raise BadGzipFile("Unknown compression method")
244+
pos = 10
245+
if flags & FHEXTRA:
246+
xlen = struct.unpack("<H", data[pos: pos+2])
247+
pos += xlen
248+
if flags & FNAME:
249+
fname_end = data.index(b"\x00", pos) + 1
250+
pos = fname_end
251+
if flags & FCOMMENT:
252+
fcomment_end = data.index(b"\x00", pos) + 1
253+
pos = fcomment_end
254+
if flags & FHCRC:
255+
pos += 2
256+
return pos
257+
258+
232259
def decompress(data):
233260
"""Decompress a gzip compressed string in one shot.
234261
Return the decompressed string.
235262
"""
236-
with _IGzipReader(io.BytesIO(data)) as f:
237-
return f.read()
263+
all_blocks: List[bytes] = []
264+
while True:
265+
if data == b"":
266+
break
267+
header_end = _gzip_header_end(data)
268+
do = isal_zlib.decompressobj(-15)
269+
block = do.decompress(data[header_end:]) + do.flush()
270+
if not do.eof or len(do.unused_data) < 8:
271+
raise EOFError("Compressed file ended before the end-of-stream "
272+
"marker was reached")
273+
checksum, length = struct.unpack("<II", do.unused_data[:8])
274+
crc = isal_zlib.crc32(block)
275+
if crc != checksum:
276+
raise BadGzipFile("CRC check failed")
277+
if length != len(block):
278+
raise BadGzipFile("Incorrect length of data produced")
279+
all_blocks.append(block)
280+
# Remove all padding null bytes and start next block.
281+
data = do.unused_data[8:].lstrip(b"\x00")
282+
return b"".join(all_blocks)
238283

239284

240285
def main():

0 commit comments

Comments
 (0)