3333
3434from . import igzip_lib , isal_zlib
3535
36- __all__ = ["IGzipFile" , "open" , "compress" , "decompress" , "BadGzipFile" ]
36+ __all__ = ["IGzipFile" , "open" , "compress" , "decompress" , "BadGzipFile" ,
37+ "READ_BUFFER_SIZE" ]
3738
3839_COMPRESS_LEVEL_FAST = isal_zlib .ISAL_BEST_SPEED
3940_COMPRESS_LEVEL_TRADEOFF = isal_zlib .ISAL_DEFAULT_COMPRESSION
4041_COMPRESS_LEVEL_BEST = isal_zlib .ISAL_BEST_COMPRESSION
4142
43+ #: The amount of data that is read in at once when decompressing a file.
44+ #: Increasing this value may increase performance.
45+ READ_BUFFER_SIZE = io .DEFAULT_BUFFER_SIZE
46+
4247FTEXT , FHCRC , FEXTRA , FNAME , FCOMMENT = 1 , 2 , 4 , 8 , 16
4348
4449try :
@@ -229,8 +234,8 @@ def __init__(self, fp):
229234 # Call the init method of gzip._GzipReader's parent here.
230235 # It is not very invasive and allows us to override _PaddedFile
231236 _compression .DecompressReader .__init__ (
232- self , _PaddedFile (fp ), isal_zlib . decompressobj ,
233- wbits = - isal_zlib . MAX_WBITS )
237+ self , _PaddedFile (fp ), igzip_lib . IgzipDecompressor ,
238+ hist_bits = igzip_lib . MAX_HIST_BITS , flag = igzip_lib . DECOMP_DEFLATE )
234239 # Set flag indicating start of a new member
235240 self ._new_member = True
236241 self ._last_mtime = None
@@ -241,6 +246,57 @@ def _add_read_data(self, data):
241246 self ._crc = isal_zlib .crc32 (data , self ._crc )
242247 self ._stream_size += len (data )
243248
249+ def read (self , size = - 1 ):
250+ if size < 0 :
251+ return self .readall ()
252+ # size=0 is special because decompress(max_length=0) is not supported
253+ if not size :
254+ return b""
255+
256+ # For certain input data, a single
257+ # call to decompress() may not return
258+ # any data. In this case, retry until we get some data or reach EOF.
259+ while True :
260+ if self ._decompressor .eof :
261+ # Ending case: we've come to the end of a member in the file,
262+ # so finish up this member, and read a new gzip header.
263+ # Check the CRC and file size, and set the flag so we read
264+ # a new member
265+ self ._read_eof ()
266+ self ._new_member = True
267+ self ._decompressor = self ._decomp_factory (
268+ ** self ._decomp_args )
269+
270+ if self ._new_member :
271+ # If the _new_member flag is set, we have to
272+ # jump to the next member, if there is one.
273+ self ._init_read ()
274+ if not self ._read_gzip_header ():
275+ self ._size = self ._pos
276+ return b""
277+ self ._new_member = False
278+
279+ # Read a chunk of data from the file
280+ if self ._decompressor .needs_input :
281+ buf = self ._fp .read (READ_BUFFER_SIZE )
282+ uncompress = self ._decompressor .decompress (buf , size )
283+ else :
284+ uncompress = self ._decompressor .decompress (b"" , size )
285+ if self ._decompressor .unused_data != b"" :
286+ # Prepend the already read bytes to the fileobj so they can
287+ # be seen by _read_eof() and _read_gzip_header()
288+ self ._fp .prepend (self ._decompressor .unused_data )
289+
290+ if uncompress != b"" :
291+ break
292+ if buf == b"" :
293+ raise EOFError ("Compressed file ended before the "
294+ "end-of-stream marker was reached" )
295+
296+ self ._add_read_data (uncompress )
297+ self ._pos += len (uncompress )
298+ return uncompress
299+
244300
245301# Aliases for improved compatibility with CPython gzip module.
246302GzipFile = IGzipFile
@@ -382,7 +438,7 @@ def _argument_parser():
382438 # diminishing returns hit. _compression.BUFFER_SIZE = 8k. But 32K is about
383439 # ~6% faster.
384440 parser .add_argument ("-b" , "--buffer-size" ,
385- default = 32 * 1024 , type = int ,
441+ default = 128 * 1024 , type = int ,
386442 help = argparse .SUPPRESS )
387443 return parser
388444
@@ -418,6 +474,8 @@ def main():
418474 elif not args .compress and args .file is not None :
419475 out_file = io .open (base , "wb" )
420476
477+ global READ_BUFFER_SIZE
478+ READ_BUFFER_SIZE = args .buffer_size
421479 try :
422480 while True :
423481 block = in_file .read (args .buffer_size )
0 commit comments