Skip to content

Commit 3234f28

Browse files
authored
Merge pull request #103 from pycompression/py3.11_2
Make code compatible with python 3.11 onwards
2 parents 9fd81d7 + ba8dc20 commit 3234f28

File tree

2 files changed

+28
-22
lines changed

2 files changed

+28
-22
lines changed

src/isal/igzip.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import struct
2929
import sys
3030
import time
31-
from typing import List, Optional, SupportsInt
31+
from typing import Optional, SupportsInt
3232
import _compression # noqa: I201 # Not third-party
3333

3434
from . import igzip_lib, isal_zlib
@@ -49,7 +49,7 @@
4949
try:
5050
BadGzipFile = gzip.BadGzipFile # type: ignore
5151
except AttributeError: # Versions lower than 3.8 do not have BadGzipFile
52-
BadGzipFile = OSError
52+
BadGzipFile = OSError # type: ignore
5353

5454

5555
# The open method was copied from the CPython source with minor adjustments.
@@ -349,11 +349,13 @@ def _gzip_header_end(data: bytes) -> int:
349349
raise BadGzipFile(f"Not a gzipped file ({repr(data[:2])})")
350350
if method != 8:
351351
raise BadGzipFile("Unknown compression method")
352+
if not flags: # Likely when data compressed in memory
353+
return 10
352354
pos = 10
353355
if flags & FEXTRA:
354356
if len(data) < pos + 2:
355357
raise eof_error
356-
xlen = int.from_bytes(data[pos: pos + 2], "little", signed=False)
358+
xlen, = struct.unpack("<H", data[pos: pos+2])
357359
pos += 2 + xlen
358360
if flags & FNAME:
359361
pos = data.find(b"\x00", pos) + 1
@@ -367,12 +369,12 @@ def _gzip_header_end(data: bytes) -> int:
367369
if flags & FHCRC:
368370
if len(data) < pos + 2:
369371
raise eof_error
370-
header_crc = int.from_bytes(data[pos: pos + 2], "little", signed=False)
372+
header_crc, = struct.unpack("<H", data[pos: pos+2])
371373
# CRC is stored as a 16-bit integer by taking last bits of crc32.
372374
crc = isal_zlib.crc32(data[:pos]) & 0xFFFF
373375
if header_crc != crc:
374-
raise BadGzipFile(f"Corrupted header. Checksums do not "
375-
f"match: {crc} != {header_crc}")
376+
raise BadGzipFile(f"Corrupted gzip header. Checksums do not "
377+
f"match: {crc:04x} != {header_crc:04x}")
376378
pos += 2
377379
return pos
378380

@@ -381,26 +383,25 @@ def decompress(data):
381383
"""Decompress a gzip compressed string in one shot.
382384
Return the decompressed string.
383385
"""
384-
all_blocks: List[bytes] = []
386+
decompressed_members = []
385387
while True:
386-
if data == b"":
387-
break
388+
if not data: # Empty data returns empty bytestring
389+
return b"".join(decompressed_members)
388390
header_end = _gzip_header_end(data)
389-
do = isal_zlib.decompressobj(-15)
390-
block = do.decompress(data[header_end:]) + do.flush()
391+
# Use a zlib raw deflate compressor
392+
do = isal_zlib.decompressobj(wbits=-isal_zlib.MAX_WBITS)
393+
# Read all the data except the header
394+
decompressed = do.decompress(data[header_end:])
391395
if not do.eof or len(do.unused_data) < 8:
392396
raise EOFError("Compressed file ended before the end-of-stream "
393397
"marker was reached")
394-
checksum, length = struct.unpack("<II", do.unused_data[:8])
395-
crc = isal_zlib.crc32(block)
396-
if crc != checksum:
398+
crc, length = struct.unpack("<II", do.unused_data[:8])
399+
if crc != isal_zlib.crc32(decompressed):
397400
raise BadGzipFile("CRC check failed")
398-
if length != len(block):
401+
if length != (len(decompressed) & 0xffffffff):
399402
raise BadGzipFile("Incorrect length of data produced")
400-
all_blocks.append(block)
401-
# Remove all padding null bytes and start next block.
403+
decompressed_members.append(decompressed)
402404
data = do.unused_data[8:].lstrip(b"\x00")
403-
return b"".join(all_blocks)
404405

405406

406407
def _argument_parser():

tests/test_igzip.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import os
2828
import re
2929
import shutil
30+
import struct
3031
import sys
3132
import tempfile
3233
import zlib
@@ -343,19 +344,23 @@ def test_header_too_short():
343344

344345

345346
def test_header_corrupt():
346-
header = b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\xff"
347+
header = (b"\x1f\x8b\x08\x1f\x00\x00\x00\x00\x00\xff" # All flags set
348+
b"\x05\x00" # Xlen = 5
349+
b"extra"
350+
b"name\x00"
351+
b"comment\x00")
347352
# Create corrupt checksum by using wrong seed.
348353
crc = zlib.crc32(header, 50) & 0xFFFF
349354
true_crc = zlib.crc32(header) & 0xFFFF
350-
header += crc.to_bytes(2, "little")
355+
header += struct.pack("<H", crc)
351356

352357
data = isal_zlib.compress(b"", wbits=-15)
353358
trailer = b"\x00" * 8
354359
compressed = header + data + trailer
355360
with pytest.raises(igzip.BadGzipFile) as error:
356361
igzip.decompress(compressed)
357-
error.match(f"Corrupted header. "
358-
f"Checksums do not match: {true_crc} != {crc}")
362+
error.match(f"Corrupted gzip header. "
363+
f"Checksums do not match: {true_crc:04x} != {crc:04x}")
359364

360365

361366
TRUNCATED_HEADERS = [

0 commit comments

Comments
 (0)