Skip to content

Commit f686045

Browse files
committed
Upgrade libbrotli to 1.2.0
1 parent 8e27c66 commit f686045

File tree

5 files changed

+153
-8
lines changed

5 files changed

+153
-8
lines changed

HISTORY.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
Changelog
22
=========
33

4+
1.2.0.0 (TBD)
5+
--------------------
6+
7+
- Upgraded libbrotli to v1.2.0.
8+
- Added ``output_buffer_limit`` parameter to ``Decompressor.decompress()`` and
9+
``Decompressor.process()`` methods to allow mitigation of unexpectedly large
10+
output. This addresses potential security concerns where maliciously crafted
11+
compressed data could result in excessive memory usage during decompression.
12+
13+
414
1.1.0.0 (2023-09-14)
515
--------------------
616

libbrotli

Submodule libbrotli updated 232 files

src/brotlicffi/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
Compressor, MODE_GENERIC, MODE_TEXT, MODE_FONT, error, Error
66
)
77

8-
__version__ = "1.1.0.0"
8+
__version__ = "1.2.0.0"

src/brotlicffi/_api.py

Lines changed: 103 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -348,17 +348,23 @@ class Decompressor(object):
348348
.. versionchanged:: 0.5.0
349349
Added ``dictionary`` parameter.
350350
351+
.. versionchanged:: 1.2.0
352+
Added ``can_accept_more_data()`` method and optional
353+
``output_buffer_limit`` parameter to ``process()``/``decompress()``.
354+
351355
:param dictionary: A pre-set dictionary for LZ77. Please use this with
352356
caution: if a dictionary is used for compression, the same dictionary
353357
**must** be used for decompression!
354358
:type dictionary: ``bytes``
355359
"""
356360
_dictionary = None
357361
_dictionary_size = None
362+
_unconsumed_data = None
358363

359364
def __init__(self, dictionary=b''):
360365
dec = lib.BrotliDecoderCreateInstance(ffi.NULL, ffi.NULL, ffi.NULL)
361366
self._decoder = ffi.gc(dec, lib.BrotliDecoderDestroyInstance)
367+
self._unconsumed_data = b''
362368

363369
if dictionary:
364370
self._dictionary = ffi.new("uint8_t []", dictionary)
@@ -369,23 +375,74 @@ def __init__(self, dictionary=b''):
369375
self._dictionary
370376
)
371377

372-
def decompress(self, data):
378+
@staticmethod
379+
def _calculate_buffer_size(
380+
input_data_len, output_buffer_limit, total_output_size, chunks_num
381+
):
382+
if output_buffer_limit is not None:
383+
remaining_space = output_buffer_limit - total_output_size
384+
if remaining_space <= 0:
385+
return None
386+
return remaining_space
387+
# When `decompress(b'')` is called without `output_buffer_limit`.
388+
elif input_data_len == 0:
389+
# libbrotli would use 32 KB as a starting buffer size and double it
390+
# each time, capped at 16 MB.
391+
# https://github.com/google/brotli/blob/028fb5a23661f123017c060daa546b55cf4bde29/python/_brotli.c#L291-L292
392+
log_size = chunks_num + 15
393+
return 1 << min(log_size, 24)
394+
else:
395+
# Allocate a buffer that's hopefully overlarge, but if it's not we
396+
# don't mind: we'll spin around again.
397+
return 5 * input_data_len
398+
399+
def decompress(self, data, output_buffer_limit=None):
373400
"""
374401
Decompress part of a complete Brotli-compressed string.
375402
403+
.. versionchanged:: 1.2.0
404+
Added ``output_buffer_limit`` parameter.
405+
376406
:param data: A bytestring containing Brotli-compressed data.
407+
:param output_buffer_limit: Optional maximum size for the output
408+
buffer. If set, the output buffer will not grow once its size
409+
equals or exceeds this value. If the limit is reached, further
410+
calls to process (potentially with empty input) will continue to
411+
yield more data. Following process() calls must only be called
412+
with empty input until can_accept_more_data() returns True.
413+
:type output_buffer_limit: ``int`` or ``None``
377414
:returns: A bytestring containing the decompressed data.
378415
"""
416+
if self._unconsumed_data and data:
417+
raise error(
418+
"brotli: decoder process called with data when "
419+
"'can_accept_more_data()' is False"
420+
)
421+
422+
# Use unconsumed data if available, use new data otherwise.
423+
if self._unconsumed_data:
424+
input_data = self._unconsumed_data
425+
self._unconsumed_data = b''
426+
else:
427+
input_data = data
428+
379429
chunks = []
430+
chunks_len = 0
380431

381-
available_in = ffi.new("size_t *", len(data))
382-
in_buffer = ffi.new("uint8_t[]", data)
432+
available_in = ffi.new("size_t *", len(input_data))
433+
in_buffer = ffi.new("uint8_t[]", input_data)
383434
next_in = ffi.new("uint8_t **", in_buffer)
384435

385436
while True:
386-
# Allocate a buffer that's hopefully overlarge, but if it's not we
387-
# don't mind: we'll spin around again.
388-
buffer_size = 5 * len(data)
437+
buffer_size = self._calculate_buffer_size(
438+
input_data_len=len(input_data),
439+
output_buffer_limit=output_buffer_limit,
440+
total_output_size=chunks_len,
441+
chunks_num=len(chunks),
442+
)
443+
if buffer_size is None:
444+
break
445+
389446
available_out = ffi.new("size_t *", buffer_size)
390447
out_buffer = ffi.new("uint8_t[]", buffer_size)
391448
next_out = ffi.new("uint8_t **", out_buffer)
@@ -408,6 +465,19 @@ def decompress(self, data):
408465
# Next, copy the result out.
409466
chunk = ffi.buffer(out_buffer, buffer_size - available_out[0])[:]
410467
chunks.append(chunk)
468+
chunks_len += len(chunk)
469+
470+
# Save any unconsumed input for the next call.
471+
if available_in[0] > 0:
472+
remaining_input = ffi.buffer(next_in[0], available_in[0])[:]
473+
self._unconsumed_data = remaining_input
474+
475+
# Check if we've reached the output limit.
476+
if (
477+
output_buffer_limit is not None
478+
and chunks_len >= output_buffer_limit
479+
):
480+
break
411481

412482
if rc == lib.BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
413483
assert available_in[0] == 0
@@ -459,3 +529,30 @@ def is_finished(self):
459529
is complete, ``False`` otherwise
460530
"""
461531
return lib.BrotliDecoderIsFinished(self._decoder) == lib.BROTLI_TRUE
532+
533+
def can_accept_more_data(self):
534+
"""
535+
Checks if the decompressor can accept more compressed data.
536+
537+
If the ``output_buffer_limit`` parameter was used with
538+
``decompress()`` or ``process()``, this method should be checked to
539+
determine if the decompressor is ready to accept new input. When the
540+
output buffer limit is reached, the decompressor may still have
541+
unconsumed input data or internal buffered output, and calling
542+
``decompress(b'')`` repeatedly will continue producing output until
543+
this method returns ``True``.
544+
545+
.. versionadded:: 1.2.0
546+
547+
:returns: ``True`` if the decompressor is ready to accept more
548+
compressed data via ``decompress()`` or ``process()``, ``False``
549+
if the decompressor needs to output some data via
550+
``decompress(b'')``/``process(b'')`` before being provided any
551+
more compressed data.
552+
:rtype: ``bool``
553+
"""
554+
if len(self._unconsumed_data) > 0:
555+
return False
556+
if lib.BrotliDecoderHasMoreOutput(self._decoder) == lib.BROTLI_TRUE:
557+
return False
558+
return True

test/test_simple_decompression.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,44 @@ def test_decompressobj(simple_compressed_file):
3838
assert data == uncompressed_data
3939

4040

41+
# `more_data_limit` allows testing `decompress(b'')` with and without a limit.
42+
@pytest.mark.parametrize('more_data_limit', [100, None])
43+
def test_decompressobj_with_output_buffer_limit(
44+
simple_compressed_file, more_data_limit
45+
):
46+
"""
47+
Test decompression with `output_buffer_limit` set.
48+
"""
49+
with open(simple_compressed_file[0], 'rb') as f:
50+
uncompressed_data = f.read()
51+
52+
with open(simple_compressed_file[1], 'rb') as f:
53+
compressed_data = f.read()
54+
55+
o = brotlicffi.Decompressor()
56+
assert o.can_accept_more_data()
57+
small_limit = 100
58+
result = o.decompress(compressed_data, output_buffer_limit=small_limit)
59+
assert len(result) <= small_limit
60+
61+
if not o.is_finished():
62+
assert not o.can_accept_more_data()
63+
64+
# Continue decompressing with empty input.
65+
all_output = [result]
66+
while not o.can_accept_more_data() and not o.is_finished():
67+
more_output = o.decompress(
68+
b'', output_buffer_limit=more_data_limit
69+
)
70+
if more_data_limit is not None:
71+
assert len(more_output) <= more_data_limit
72+
all_output.append(more_output)
73+
assert o.can_accept_more_data() or o.is_finished()
74+
75+
final_result = b''.join(all_output)
76+
assert final_result == uncompressed_data
77+
78+
4179
def test_drip_feed(simple_compressed_file):
4280
"""
4381
Sending in the data one byte at a time still works.

0 commit comments

Comments
 (0)