Upgrade libbrotli to 1.2.0

illia-v · illia-v · commit f6860450452a · 2025-11-01T21:59:22.000Z
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -1,6 +1,16 @@
 Changelog
 =========
 
+1.2.0.0 (TBD)
+--------------------
+
+- Upgraded libbrotli to v1.2.0.
+- Added ``output_buffer_limit`` parameter to ``Decompressor.decompress()`` and
+  ``Decompressor.process()`` methods to allow mitigation of unexpectedly large
+  output. This addresses potential security concerns where maliciously crafted
+  compressed data could result in excessive memory usage during decompression.
+
+
 1.1.0.0 (2023-09-14)
 --------------------
 
diff --git a/libbrotli b/libbrotli
@@ -1 +1 @@
-Subproject commit ed738e842d2fbdf2d6459e39267a633c4a9b2f5d
+Subproject commit 028fb5a23661f123017c060daa546b55cf4bde29
diff --git a/src/brotlicffi/__init__.py b/src/brotlicffi/__init__.py
@@ -5,4 +5,4 @@
     Compressor, MODE_GENERIC, MODE_TEXT, MODE_FONT, error, Error
 )
 
-__version__ = "1.1.0.0"
+__version__ = "1.2.0.0"
diff --git a/src/brotlicffi/_api.py b/src/brotlicffi/_api.py
@@ -348,17 +348,23 @@ class Decompressor(object):
     .. versionchanged:: 0.5.0
        Added ``dictionary`` parameter.
 
+    .. versionchanged:: 1.2.0
+       Added ``can_accept_more_data()`` method and optional
+       ``output_buffer_limit`` parameter to ``process()``/``decompress()``.
+
     :param dictionary: A pre-set dictionary for LZ77. Please use this with
         caution: if a dictionary is used for compression, the same dictionary
         **must** be used for decompression!
     :type dictionary: ``bytes``
     """
     _dictionary = None
     _dictionary_size = None
+    _unconsumed_data = None
 
     def __init__(self, dictionary=b''):
         dec = lib.BrotliDecoderCreateInstance(ffi.NULL, ffi.NULL, ffi.NULL)
         self._decoder = ffi.gc(dec, lib.BrotliDecoderDestroyInstance)
+        self._unconsumed_data = b''
 
         if dictionary:
             self._dictionary = ffi.new("uint8_t []", dictionary)
@@ -369,23 +375,74 @@ def __init__(self, dictionary=b''):
                 self._dictionary
             )
 
-    def decompress(self, data):
+    @staticmethod
+    def _calculate_buffer_size(
+        input_data_len, output_buffer_limit, total_output_size, chunks_num
+    ):
+        if output_buffer_limit is not None:
+            remaining_space = output_buffer_limit - total_output_size
+            if remaining_space <= 0:
+                return None
+            return remaining_space
+        # When `decompress(b'')` is called without `output_buffer_limit`.
+        elif input_data_len == 0:
+            # libbrotli would use 32 KB as a starting buffer size and double it
+            # each time, capped at 16 MB.
+            # https://github.com/google/brotli/blob/028fb5a23661f123017c060daa546b55cf4bde29/python/_brotli.c#L291-L292
+            log_size = chunks_num + 15
+            return 1 << min(log_size, 24)
+        else:
+            # Allocate a buffer that's hopefully overlarge, but if it's not we
+            # don't mind: we'll spin around again.
+            return 5 * input_data_len
+
+    def decompress(self, data, output_buffer_limit=None):
         """
         Decompress part of a complete Brotli-compressed string.
 
+        .. versionchanged:: 1.2.0
+           Added ``output_buffer_limit`` parameter.
+
         :param data: A bytestring containing Brotli-compressed data.
+        :param output_buffer_limit: Optional maximum size for the output
+            buffer. If set, the output buffer will not grow once its size
+            equals or exceeds this value. If the limit is reached, further
+            calls to process (potentially with empty input) will continue to
+            yield more data. Following process() calls must only be called
+            with empty input until can_accept_more_data() returns True.
+        :type output_buffer_limit: ``int`` or ``None``
         :returns: A bytestring containing the decompressed data.
         """
+        if self._unconsumed_data and data:
+            raise error(
+                "brotli: decoder process called with data when "
+                "'can_accept_more_data()' is False"
+            )
+
+        # Use unconsumed data if available, use new data otherwise.
+        if self._unconsumed_data:
+            input_data = self._unconsumed_data
+            self._unconsumed_data = b''
+        else:
+            input_data = data
+
         chunks = []
+        chunks_len = 0
 
-        available_in = ffi.new("size_t *", len(data))
-        in_buffer = ffi.new("uint8_t[]", data)
+        available_in = ffi.new("size_t *", len(input_data))
+        in_buffer = ffi.new("uint8_t[]", input_data)
         next_in = ffi.new("uint8_t **", in_buffer)
 
         while True:
-            # Allocate a buffer that's hopefully overlarge, but if it's not we
-            # don't mind: we'll spin around again.
-            buffer_size = 5 * len(data)
+            buffer_size = self._calculate_buffer_size(
+                input_data_len=len(input_data),
+                output_buffer_limit=output_buffer_limit,
+                total_output_size=chunks_len,
+                chunks_num=len(chunks),
+            )
+            if buffer_size is None:
+                break
+
             available_out = ffi.new("size_t *", buffer_size)
             out_buffer = ffi.new("uint8_t[]", buffer_size)
             next_out = ffi.new("uint8_t **", out_buffer)
@@ -408,6 +465,19 @@ def decompress(self, data):
             # Next, copy the result out.
             chunk = ffi.buffer(out_buffer, buffer_size - available_out[0])[:]
             chunks.append(chunk)
+            chunks_len += len(chunk)
+
+            # Save any unconsumed input for the next call.
+            if available_in[0] > 0:
+                remaining_input = ffi.buffer(next_in[0], available_in[0])[:]
+                self._unconsumed_data = remaining_input
+
+            # Check if we've reached the output limit.
+            if (
+                output_buffer_limit is not None
+                and chunks_len >= output_buffer_limit
+            ):
+                break
 
             if rc == lib.BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT:
                 assert available_in[0] == 0
@@ -459,3 +529,30 @@ def is_finished(self):
         is complete, ``False`` otherwise
         """
         return lib.BrotliDecoderIsFinished(self._decoder) == lib.BROTLI_TRUE
+
+    def can_accept_more_data(self):
+        """
+        Checks if the decompressor can accept more compressed data.
+
+        If the ``output_buffer_limit`` parameter was used with
+        ``decompress()`` or ``process()``, this method should be checked to
+        determine if the decompressor is ready to accept new input. When the
+        output buffer limit is reached, the decompressor may still have
+        unconsumed input data or internal buffered output, and calling
+        ``decompress(b'')`` repeatedly will continue producing output until
+        this method returns ``True``.
+
+        .. versionadded:: 1.2.0
+
+        :returns: ``True`` if the decompressor is ready to accept more
+            compressed data via ``decompress()`` or ``process()``, ``False``
+            if the decompressor needs to output some data via
+            ``decompress(b'')``/``process(b'')`` before being provided any
+            more compressed data.
+        :rtype: ``bool``
+        """
+        if len(self._unconsumed_data) > 0:
+            return False
+        if lib.BrotliDecoderHasMoreOutput(self._decoder) == lib.BROTLI_TRUE:
+            return False
+        return True
diff --git a/test/test_simple_decompression.py b/test/test_simple_decompression.py
@@ -38,6 +38,44 @@ def test_decompressobj(simple_compressed_file):
     assert data == uncompressed_data
 
 
+# `more_data_limit` allows testing `decompress(b'')` with and without a limit.
+@pytest.mark.parametrize('more_data_limit', [100, None])
+def test_decompressobj_with_output_buffer_limit(
+    simple_compressed_file, more_data_limit
+):
+    """
+    Test decompression with `output_buffer_limit` set.
+    """
+    with open(simple_compressed_file[0], 'rb') as f:
+        uncompressed_data = f.read()
+
+    with open(simple_compressed_file[1], 'rb') as f:
+        compressed_data = f.read()
+
+    o = brotlicffi.Decompressor()
+    assert o.can_accept_more_data()
+    small_limit = 100
+    result = o.decompress(compressed_data, output_buffer_limit=small_limit)
+    assert len(result) <= small_limit
+
+    if not o.is_finished():
+        assert not o.can_accept_more_data()
+
+        # Continue decompressing with empty input.
+        all_output = [result]
+        while not o.can_accept_more_data() and not o.is_finished():
+            more_output = o.decompress(
+                b'', output_buffer_limit=more_data_limit
+            )
+            if more_data_limit is not None:
+                assert len(more_output) <= more_data_limit
+            all_output.append(more_output)
+        assert o.can_accept_more_data() or o.is_finished()
+
+        final_result = b''.join(all_output)
+        assert final_result == uncompressed_data
+
+
 def test_drip_feed(simple_compressed_file):
     """
     Sending in the data one byte at a time still works.

Original file line number	Diff line number	Diff line change
`@@ -5,4 +5,4 @@`
`5`	`5`	`Compressor, MODE_GENERIC, MODE_TEXT, MODE_FONT, error, Error`
`6`	`6`	`)`
`7`	`7`
`8`		`-__version__ = "1.1.0.0"`
	`8`	`+__version__ = "1.2.0.0"`