Skip to content

Commit c238e24

Browse files
committed
Throw an overflow error on big header sizes
1 parent 7dad3d0 commit c238e24

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

src/isal/igzip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ def decompress(data):
264264
gzip member is guaranteed to be present.
265265
"""
266266
fp = io.BytesIO(data)
267-
reader = _GzipReader(fp, len(data))
267+
reader = _GzipReader(fp, max(len(data), 16))
268268
return reader.readall()
269269

270270

src/isal/isal_zlibmodule.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,16 @@ GzipReader__new__(PyTypeObject *type, PyObject *args, PyObject *kwargs)
12821282
args, kwargs, format, keywords, &fp, &buffer_size)) {
12831283
return NULL;
12841284
}
1285+
if (buffer_size < 16) {
1286+
// Necessary to distinguish between truncated headers and headers
1287+
// which are too big. A header is at least 10 bytes, but may contain
1288+
// more depending on flags.
1289+
PyErr_Format(
1290+
PyExc_ValueError,
1291+
"buffersize must be at least 16, got %zd", buffer_size
1292+
);
1293+
return NULL;
1294+
}
12851295
GzipReader *self = PyObject_New(GzipReader, type);
12861296
self->buffer_size = buffer_size;
12871297
self->input_buffer = PyMem_Malloc(self->buffer_size);
@@ -1323,6 +1333,15 @@ static inline ssize_t GzipReader_read_from_file(GzipReader *self)
13231333
current_pos = input_buffer;
13241334
buffer_end = input_buffer + remaining;
13251335
size_t read_in_size = self->buffer_size - remaining;
1336+
if (read_in_size == 0) {
1337+
// The buffer is already full of data but the current position could not
1338+
// progress. This happens when the header is too large.
1339+
PyErr_Format(
1340+
PyExc_OverflowError,
1341+
"header does not fit into buffer of size %zu",
1342+
self->buffer_size);
1343+
return -1;
1344+
}
13261345
PyObject *bufview = PyMemoryView_FromMemory((char *)buffer_end, read_in_size, PyBUF_WRITE);
13271346
if (bufview == NULL) {
13281347
return -1;

tests/test_igzip.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,26 @@ def test_truncated_header(trunc):
431431
with pytest.raises(EOFError):
432432
igzip.decompress(trunc)
433433

434+
def test_very_long_header_in_data():
435+
# header with a very long filename.
436+
header = (b"\x1f\x8b\x08\x08\x00\x00\x00\x00\x00\xff" + 256 * 1024 * b"A" +
437+
b"\x00")
438+
compressed = header + isal_zlib.compress(b"", 3, -15) + 8 * b"\00"
439+
assert igzip.decompress(compressed) == b""
440+
441+
442+
def test_very_long_header_in_file():
443+
# header with a very long filename.
444+
header = (b"\x1f\x8b\x08\x08\x00\x00\x00\x00\x00\xff" +
445+
igzip.READ_BUFFER_SIZE * 2 * b"A" +
446+
b"\x00")
447+
compressed = header + isal_zlib.compress(b"", 3, -15) + 8 * b"\00"
448+
f = io.BytesIO(compressed)
449+
with pytest.raises(OverflowError) as error:
450+
with igzip.open(f, "rb") as gzip_file:
451+
gzip_file.read()
452+
error.match(f"header does not fit into buffer of size {igzip.READ_BUFFER_SIZE}")
453+
434454

435455
def test_concatenated_gzip():
436456
concat = Path(__file__).parent / "data" / "concatenated.fastq.gz"

0 commit comments

Comments
 (0)