Skip to content

Commit 36551e5

Browse files
authored
Merge pull request #164 from pycompression/fixbuffercrash
Make sure large write calls cannot overflow the buffer in ThreadedGzipWriter
2 parents 71b7d61 + dc7f6dd commit 36551e5

File tree

2 files changed

+48
-15
lines changed

2 files changed

+48
-15
lines changed

src/isal/igzip_threaded.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,10 @@ def open(filename, mode="rb", compresslevel=igzip._COMPRESS_LEVEL_TRADEOFF,
6767
gzip_file = io.BufferedReader(
6868
_ThreadedGzipReader(binary_file, block_size=block_size))
6969
else:
70-
# Deflating random data results in an output a little larger than the
71-
# input. Making the output buffer 10% larger is sufficient overkill.
72-
compress_buffer_size = block_size + max(
73-
block_size // 10, 500)
7470
gzip_file = io.BufferedWriter(
7571
_ThreadedGzipWriter(
7672
fp=binary_file,
77-
buffer_size=compress_buffer_size,
73+
block_size=block_size,
7874
level=compresslevel,
7975
threads=threads
8076
),
@@ -201,15 +197,19 @@ def __init__(self,
201197
level: int = isal_zlib.ISAL_DEFAULT_COMPRESSION,
202198
threads: int = 1,
203199
queue_size: int = 1,
204-
buffer_size: int = 1024 * 1024,
200+
block_size: int = 1024 * 1024,
205201
):
206202
self.lock = threading.Lock()
207203
self.exception: Optional[Exception] = None
208204
self.raw = fp
209205
self.level = level
210206
self.previous_block = b""
207+
# Deflating random data results in an output a little larger than the
208+
# input. Making the output buffer 10% larger is sufficient overkill.
209+
compress_buffer_size = block_size + max(block_size // 10, 500)
210+
self.block_size = block_size
211211
self.compressors: List[isal_zlib._ParallelCompress] = [
212-
isal_zlib._ParallelCompress(buffersize=buffer_size,
212+
isal_zlib._ParallelCompress(buffersize=compress_buffer_size,
213213
level=level) for _ in range(threads)
214214
]
215215
if threads > 1:
@@ -273,8 +273,19 @@ def write(self, b) -> int:
273273
with self.lock:
274274
if self.exception:
275275
raise self.exception
276-
index = self.index
276+
length = b.nbytes if isinstance(b, memoryview) else len(b)
277+
if length > self.block_size:
278+
# write smaller chunks and return the result
279+
memview = memoryview(b)
280+
start = 0
281+
total_written = 0
282+
while start < length:
283+
total_written += self.write(
284+
memview[start:start+self.block_size])
285+
start += self.block_size
286+
return total_written
277287
data = bytes(b)
288+
index = self.index
278289
zdict = memoryview(self.previous_block)[-DEFLATE_WINDOW_SIZE:]
279290
self.previous_block = data
280291
self.index += 1

tests/test_igzip_threaded.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,12 @@ def test_threaded_read():
2828

2929

3030
@pytest.mark.parametrize(["mode", "threads"],
31-
itertools.product(["wb", "wt"], [1, 3]))
31+
itertools.product(["wb", "wt"], [1, 3, -1]))
3232
def test_threaded_write(mode, threads):
3333
with tempfile.NamedTemporaryFile("wb", delete=False) as tmp:
34-
with igzip_threaded.open(tmp, mode, threads=threads) as out_file:
34+
# Use a small block size to simulate many writes.
35+
with igzip_threaded.open(tmp, mode, threads=threads,
36+
block_size=8*1024) as out_file:
3537
gzip_open_mode = "rb" if "b" in mode else "rt"
3638
with gzip.open(TEST_FILE, gzip_open_mode) as in_file:
3739
while True:
@@ -77,13 +79,33 @@ def test_threaded_read_error():
7779

7880
@pytest.mark.timeout(5)
7981
@pytest.mark.parametrize("threads", [1, 3])
80-
def test_threaded_write_error(threads):
81-
# parallel_deflate_and_crc method is called in a worker thread.
82-
with pytest.raises(OverflowError) as error:
82+
def test_threaded_write_oversized_block_no_error(threads):
83+
# Random bytes are incompressible, and therefore are guaranteed to
84+
# trigger a buffer overflow when larger than block size unless handled
85+
# correctly.
86+
data = os.urandom(1024 * 63) # not a multiple of block_size
87+
with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp:
8388
with igzip_threaded.open(
84-
io.BytesIO(), "wb", compresslevel=3, threads=threads
89+
tmp, "wb", compresslevel=3, threads=threads,
90+
block_size=8 * 1024
8591
) as writer:
86-
writer.write(os.urandom(1024 * 1024 * 50))
92+
writer.write(data)
93+
with gzip.open(tmp.name, "rb") as gzipped:
94+
decompressed = gzipped.read()
95+
assert data == decompressed
96+
97+
98+
@pytest.mark.timeout(5)
99+
@pytest.mark.parametrize("threads", [1, 3])
100+
def test_threaded_write_error(threads):
101+
f = igzip_threaded._ThreadedGzipWriter(
102+
fp=io.BytesIO(), level=3,
103+
threads=threads, block_size=8 * 1024)
104+
# Bypass the write method which should not allow blocks larger than
105+
# block_size.
106+
f.input_queues[0].put((os.urandom(1024 * 64), b""))
107+
with pytest.raises(OverflowError) as error:
108+
f.close()
87109
error.match("Compressed output exceeds buffer size")
88110

89111

0 commit comments

Comments
 (0)