Skip to content

Commit d1e7458

Browse files
committed
Increase testing and fix functions
1 parent 124092f commit d1e7458

File tree

2 files changed

+75
-7
lines changed

2 files changed

+75
-7
lines changed

src/isal/igzip.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
_COMPRESS_LEVEL_TRADEOFF = isal_zlib.ISAL_DEFAULT_COMPRESSION
3939
_COMPRESS_LEVEL_BEST = isal_zlib.ISAL_BEST_COMPRESSION
4040

41-
FTEXT, FHCRC, FHEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
41+
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
4242

4343
try:
4444
BadGzipFile = gzip.BadGzipFile # type: ignore
@@ -259,32 +259,42 @@ def _gzip_header_end(data: bytes) -> int:
259259
:param data: Compressed data that starts with a gzip header.
260260
:return: The end of the header / start of the raw deflate block.
261261
"""
262+
eof_error = EOFError("Compressed file ended before the end-of-stream "
263+
"marker was reached")
262264
if len(data) < 10:
263-
raise BadGzipFile("Gzip header should be 10 bytes or more")
265+
raise eof_error
264266
# We are not interested in mtime, xfl and os flags.
265267
magic, method, flags = struct.unpack("<HBB", data[:4])
266268
if magic != 0x8b1f:
267269
raise BadGzipFile(f"Not a gzipped file ({repr(data[:2])})")
268270
if method != 8:
269271
raise BadGzipFile("Unknown compression method")
270272
pos = 10
271-
if flags & FHEXTRA:
273+
failure = False
274+
if flags & FEXTRA:
272275
xlen = int.from_bytes(data[pos: pos + 2], "little", signed=False)
273276
pos += 2 + xlen
274277
if flags & FNAME:
275-
fname_end = data.index(b"\x00", pos) + 1
278+
fname_end = data.find(b"\x00", pos) + 1
279+
# fname_end will be -1 + 1 when null byte not found.
280+
if not fname_end:
281+
raise eof_error
276282
pos = fname_end
277283
if flags & FCOMMENT:
278-
fcomment_end = data.index(b"\x00", pos) + 1
284+
fcomment_end = data.find(b"\x00", pos) + 1
285+
if not fcomment_end:
286+
raise eof_error
279287
pos = fcomment_end
280288
if flags & FHCRC:
281-
pos += 2
282289
header_crc = int.from_bytes(data[pos: pos + 2], "little", signed=False)
283290
# CRC is stored as a 16-bit integer by taking last bits of crc32.
284291
crc = isal_zlib.crc32(data[:pos]) & 0xFFFF
285292
if header_crc != crc:
286293
raise BadGzipFile(f"Corrupted header. Checksums do not "
287294
f"match: {crc} != {header_crc}")
295+
pos += 2
296+
if failure or pos > len(data):
297+
raise eof_error
288298
return pos
289299

290300

tests/test_igzip.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@
3030
import sys
3131
import tempfile
3232
import zlib
33+
from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT # type: ignore
3334
from pathlib import Path
3435

35-
from isal import igzip
36+
from isal import igzip, isal_zlib
3637

3738
import pytest
3839

@@ -214,3 +215,60 @@ def test_decompress_unknown_compression_method():
214215

215216
def test_decompress_empty():
216217
assert igzip.decompress(b"") == b""
218+
219+
220+
def headers():
221+
magic = b"\x1f\x8b"
222+
method = b"\x08"
223+
mtime = b"\x00\x00\x00\x00"
224+
xfl = b"\x00"
225+
os = b"\xff"
226+
common_hdr_start = magic + method
227+
common_hdr_end = mtime + xfl + os
228+
xtra = b"METADATA"
229+
xlen = len(xtra)
230+
fname = b"my_data.tar"
231+
fcomment = b"I wrote this header with my bare hands"
232+
yield (common_hdr_start + FEXTRA.to_bytes(1, "little") +
233+
common_hdr_end + xlen.to_bytes(2, "little") + xtra)
234+
yield (common_hdr_start + FNAME.to_bytes(1, "little") +
235+
common_hdr_end + fname + b"\x00")
236+
yield (common_hdr_start + FCOMMENT.to_bytes(1, "little") +
237+
common_hdr_end + fcomment + b"\x00")
238+
flag = FHCRC.to_bytes(1, "little")
239+
header = common_hdr_start + flag + common_hdr_end
240+
crc = zlib.crc32(header) & 0xFFFF
241+
yield(header + crc.to_bytes(2, "little"))
242+
flag_bits = FTEXT | FEXTRA | FNAME | FCOMMENT | FHCRC
243+
flag = flag_bits.to_bytes(1, "little")
244+
header = (common_hdr_start + flag + common_hdr_end +
245+
xlen.to_bytes(2, "little") + xtra + fname + b"\x00" +
246+
fcomment + b"\x00")
247+
crc = zlib.crc32(header) & 0xFFFF
248+
yield header + crc.to_bytes(2, "little")
249+
250+
251+
@pytest.mark.parametrize("header", list(headers()))
252+
def test_gzip_header_end(header):
253+
assert igzip._gzip_header_end(header) == len(header)
254+
255+
256+
def test_header_too_short():
257+
with pytest.raises(igzip.BadGzipFile):
258+
gzip.decompress(b"00")
259+
260+
261+
def test_header_corrupt():
262+
header = b"\x1f\x8b\x08\x02\x00\x00\x00\x00\x00\xff"
263+
# Create corrupt checksum by using wrong seed.
264+
crc = zlib.crc32(header, 50) & 0xFFFF
265+
true_crc = zlib.crc32(header) & 0xFFFF
266+
header += crc.to_bytes(2, "little")
267+
268+
data = isal_zlib.compress(b"", wbits=-15)
269+
trailer = b"\x00" * 8
270+
compressed = header + data + trailer
271+
with pytest.raises(igzip.BadGzipFile) as error:
272+
igzip.decompress(compressed)
273+
error.match(f"Corrupted header. "
274+
f"Checksums do not match: {true_crc} != {crc}")

0 commit comments

Comments
 (0)