Skip to content

Commit 673c6d3

Browse files
committed
Bring python-isal up to date with python3.11 changes
1 parent 9fd81d7 commit 673c6d3

File tree

2 files changed

+26
-22
lines changed

2 files changed

+26
-22
lines changed

src/isal/igzip.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import struct
2929
import sys
3030
import time
31-
from typing import List, Optional, SupportsInt
31+
from typing import Optional, SupportsInt
3232
import _compression # noqa: I201 # Not third-party
3333

3434
from . import igzip_lib, isal_zlib
@@ -49,7 +49,7 @@
4949
try:
5050
BadGzipFile = gzip.BadGzipFile # type: ignore
5151
except AttributeError: # Versions lower than 3.8 do not have BadGzipFile
52-
BadGzipFile = OSError
52+
BadGzipFile = OSError # type: ignore
5353

5454

5555
# The open method was copied from the CPython source with minor adjustments.
@@ -353,7 +353,7 @@ def _gzip_header_end(data: bytes) -> int:
353353
if flags & FEXTRA:
354354
if len(data) < pos + 2:
355355
raise eof_error
356-
xlen = int.from_bytes(data[pos: pos + 2], "little", signed=False)
356+
xlen, = struct.unpack("<H", data[pos: pos+2])
357357
pos += 2 + xlen
358358
if flags & FNAME:
359359
pos = data.find(b"\x00", pos) + 1
@@ -367,12 +367,12 @@ def _gzip_header_end(data: bytes) -> int:
367367
if flags & FHCRC:
368368
if len(data) < pos + 2:
369369
raise eof_error
370-
header_crc = int.from_bytes(data[pos: pos + 2], "little", signed=False)
370+
header_crc, = struct.unpack("<H", data[pos: pos+2])
371371
# CRC is stored as a 16-bit integer by taking last bits of crc32.
372372
crc = isal_zlib.crc32(data[:pos]) & 0xFFFF
373373
if header_crc != crc:
374-
raise BadGzipFile(f"Corrupted header. Checksums do not "
375-
f"match: {crc} != {header_crc}")
374+
raise BadGzipFile(f"Corrupted gzip header. Checksums do not "
375+
f"match: {crc:04x} != {header_crc:04x}")
376376
pos += 2
377377
return pos
378378

@@ -381,26 +381,25 @@ def decompress(data):
381381
"""Decompress a gzip compressed string in one shot.
382382
Return the decompressed string.
383383
"""
384-
all_blocks: List[bytes] = []
384+
decompressed_members = []
385385
while True:
386-
if data == b"":
387-
break
386+
if not data: # Empty data returns empty bytestring
387+
return b"".join(decompressed_members)
388388
header_end = _gzip_header_end(data)
389-
do = isal_zlib.decompressobj(-15)
390-
block = do.decompress(data[header_end:]) + do.flush()
389+
# Use a zlib raw deflate compressor
390+
do = isal_zlib.decompressobj(wbits=-isal_zlib.MAX_WBITS)
391+
# Read all the data except the header
392+
decompressed = do.decompress(data[header_end:])
391393
if not do.eof or len(do.unused_data) < 8:
392394
raise EOFError("Compressed file ended before the end-of-stream "
393395
"marker was reached")
394-
checksum, length = struct.unpack("<II", do.unused_data[:8])
395-
crc = isal_zlib.crc32(block)
396-
if crc != checksum:
396+
crc, length = struct.unpack("<II", do.unused_data[:8])
397+
if crc != isal_zlib.crc32(decompressed):
397398
raise BadGzipFile("CRC check failed")
398-
if length != len(block):
399+
if length != (len(decompressed) & 0xffffffff):
399400
raise BadGzipFile("Incorrect length of data produced")
400-
all_blocks.append(block)
401-
# Remove all padding null bytes and start next block.
401+
decompressed_members.append(decompressed)
402402
data = do.unused_data[8:].lstrip(b"\x00")
403-
return b"".join(all_blocks)
404403

405404

406405
def _argument_parser():

tests/test_igzip.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import os
2828
import re
2929
import shutil
30+
import struct
3031
import sys
3132
import tempfile
3233
import zlib
@@ -343,19 +344,23 @@ def test_header_too_short():
343344

344345

345346
def test_header_corrupt():
346-
header = b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\xff"
347+
header = (b"\x1f\x8b\x08\x1f\x00\x00\x00\x00\x00\xff" # All flags set
348+
b"\x05\x00" # Xlen = 5
349+
b"extra"
350+
b"name\x00"
351+
b"comment\x00")
347352
# Create corrupt checksum by using wrong seed.
348353
crc = zlib.crc32(header, 50) & 0xFFFF
349354
true_crc = zlib.crc32(header) & 0xFFFF
350-
header += crc.to_bytes(2, "little")
355+
header += struct.pack("<H", crc)
351356

352357
data = isal_zlib.compress(b"", wbits=-15)
353358
trailer = b"\x00" * 8
354359
compressed = header + data + trailer
355360
with pytest.raises(igzip.BadGzipFile) as error:
356361
igzip.decompress(compressed)
357-
error.match(f"Corrupted header. "
358-
f"Checksums do not match: {true_crc} != {crc}")
362+
error.match(f"Corrupted gzip header. "
363+
f"Checksums do not match: {true_crc:04x} != {crc:04x}")
359364

360365

361366
TRUNCATED_HEADERS = [

0 commit comments

Comments
 (0)