@@ -348,17 +348,23 @@ class Decompressor(object):
348348 .. versionchanged:: 0.5.0
349349 Added ``dictionary`` parameter.
350350
351+ .. versionchanged:: 1.2.0
352+ Added ``can_accept_more_data()`` method and optional
353+ ``output_buffer_limit`` parameter to ``process()``/``decompress()``.
354+
351355 :param dictionary: A pre-set dictionary for LZ77. Please use this with
352356 caution: if a dictionary is used for compression, the same dictionary
353357 **must** be used for decompression!
354358 :type dictionary: ``bytes``
355359 """
356360 _dictionary = None
357361 _dictionary_size = None
362+ _unconsumed_data = None
358363
359364 def __init__ (self , dictionary = b'' ):
360365 dec = lib .BrotliDecoderCreateInstance (ffi .NULL , ffi .NULL , ffi .NULL )
361366 self ._decoder = ffi .gc (dec , lib .BrotliDecoderDestroyInstance )
367+ self ._unconsumed_data = b''
362368
363369 if dictionary :
364370 self ._dictionary = ffi .new ("uint8_t []" , dictionary )
@@ -369,23 +375,74 @@ def __init__(self, dictionary=b''):
369375 self ._dictionary
370376 )
371377
372- def decompress (self , data ):
378+ @staticmethod
379+ def _calculate_buffer_size (
380+ input_data_len , output_buffer_limit , total_output_size , chunks_num
381+ ):
382+ if output_buffer_limit is not None :
383+ remaining_space = output_buffer_limit - total_output_size
384+ if remaining_space <= 0 :
385+ return None
386+ return remaining_space
387+ # When `decompress(b'')` is called without `output_buffer_limit`.
388+ elif input_data_len == 0 :
389+ # libbrotli would use 32 KB as a starting buffer size and double it
390+ # each time, capped at 16 MB.
391+ # https://github.com/google/brotli/blob/028fb5a23661f123017c060daa546b55cf4bde29/python/_brotli.c#L291-L292
392+ log_size = chunks_num + 15
393+ return 1 << min (log_size , 24 )
394+ else :
395+ # Allocate a buffer that's hopefully overlarge, but if it's not we
396+ # don't mind: we'll spin around again.
397+ return 5 * input_data_len
398+
399+ def decompress (self , data , output_buffer_limit = None ):
373400 """
374401 Decompress part of a complete Brotli-compressed string.
375402
403+ .. versionchanged:: 1.2.0
404+ Added ``output_buffer_limit`` parameter.
405+
376406 :param data: A bytestring containing Brotli-compressed data.
407+ :param output_buffer_limit: Optional maximum size for the output
408+ buffer. If set, the output buffer will not grow once its size
409+ equals or exceeds this value. If the limit is reached, further
410+ calls to process (potentially with empty input) will continue to
411+ yield more data. Following process() calls must only be called
412+ with empty input until can_accept_more_data() returns True.
413+ :type output_buffer_limit: ``int`` or ``None``
377414 :returns: A bytestring containing the decompressed data.
378415 """
416+ if self ._unconsumed_data and data :
417+ raise error (
418+ "brotli: decoder process called with data when "
419+ "'can_accept_more_data()' is False"
420+ )
421+
422+ # Use unconsumed data if available, use new data otherwise.
423+ if self ._unconsumed_data :
424+ input_data = self ._unconsumed_data
425+ self ._unconsumed_data = b''
426+ else :
427+ input_data = data
428+
379429 chunks = []
430+ chunks_len = 0
380431
381- available_in = ffi .new ("size_t *" , len (data ))
382- in_buffer = ffi .new ("uint8_t[]" , data )
432+ available_in = ffi .new ("size_t *" , len (input_data ))
433+ in_buffer = ffi .new ("uint8_t[]" , input_data )
383434 next_in = ffi .new ("uint8_t **" , in_buffer )
384435
385436 while True :
386- # Allocate a buffer that's hopefully overlarge, but if it's not we
387- # don't mind: we'll spin around again.
388- buffer_size = 5 * len (data )
437+ buffer_size = self ._calculate_buffer_size (
438+ input_data_len = len (input_data ),
439+ output_buffer_limit = output_buffer_limit ,
440+ total_output_size = chunks_len ,
441+ chunks_num = len (chunks ),
442+ )
443+ if buffer_size is None :
444+ break
445+
389446 available_out = ffi .new ("size_t *" , buffer_size )
390447 out_buffer = ffi .new ("uint8_t[]" , buffer_size )
391448 next_out = ffi .new ("uint8_t **" , out_buffer )
@@ -408,6 +465,19 @@ def decompress(self, data):
408465 # Next, copy the result out.
409466 chunk = ffi .buffer (out_buffer , buffer_size - available_out [0 ])[:]
410467 chunks .append (chunk )
468+ chunks_len += len (chunk )
469+
470+ # Save any unconsumed input for the next call.
471+ if available_in [0 ] > 0 :
472+ remaining_input = ffi .buffer (next_in [0 ], available_in [0 ])[:]
473+ self ._unconsumed_data = remaining_input
474+
475+ # Check if we've reached the output limit.
476+ if (
477+ output_buffer_limit is not None
478+ and chunks_len >= output_buffer_limit
479+ ):
480+ break
411481
412482 if rc == lib .BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT :
413483 assert available_in [0 ] == 0
@@ -459,3 +529,30 @@ def is_finished(self):
459529 is complete, ``False`` otherwise
460530 """
461531 return lib .BrotliDecoderIsFinished (self ._decoder ) == lib .BROTLI_TRUE
532+
533+ def can_accept_more_data (self ):
534+ """
535+ Checks if the decompressor can accept more compressed data.
536+
537+ If the ``output_buffer_limit`` parameter was used with
538+ ``decompress()`` or ``process()``, this method should be checked to
539+ determine if the decompressor is ready to accept new input. When the
540+ output buffer limit is reached, the decompressor may still have
541+ unconsumed input data or internal buffered output, and calling
542+ ``decompress(b'')`` repeatedly will continue producing output until
543+ this method returns ``True``.
544+
545+ .. versionadded:: 1.2.0
546+
547+ :returns: ``True`` if the decompressor is ready to accept more
548+ compressed data via ``decompress()`` or ``process()``, ``False``
549+ if the decompressor needs to output some data via
550+ ``decompress(b'')``/``process(b'')`` before being provided any
551+ more compressed data.
552+ :rtype: ``bool``
553+ """
554+ if len (self ._unconsumed_data ) > 0 :
555+ return False
556+ if lib .BrotliDecoderHasMoreOutput (self ._decoder ) == lib .BROTLI_TRUE :
557+ return False
558+ return True
0 commit comments