@@ -220,6 +220,22 @@ def write(self, data):
220220 return length
221221
222222
223+ def detect_bgzip (header : bytes ) -> bool :
224+ if len (header ) < 18 :
225+ return False
226+ magic , method , flags , mtime , xfl , os , xlen , si1 , si2 , slen , bsize = \
227+ struct .unpack ("<HBBIBBHBBHH" , header [:18 ])
228+ return (
229+ method == 8 and # Deflate method used
230+ flags & 4 and # There are extra fields
231+ xlen == 6 and # The extra field should be of length 6
232+ si1 == 66 and # BGZIP magic number one
233+ si2 == 67 and # BGZIP magic number two
234+ slen == 2 # The length of the 16 bit integer that stores
235+ # the size of the block
236+ )
237+
238+
223239class _PaddedFile (gzip ._PaddedFile ):
224240 # Overwrite _PaddedFile from gzip as its prepend method assumes that
225241 # the prepended data is always read from its _buffer. Unfortunately in
@@ -249,6 +265,15 @@ def __init__(self, fp):
249265 # Set flag indicating start of a new member
250266 self ._new_member = True
251267 self ._last_mtime = None
268+ self ._read_buffer_size = READ_BUFFER_SIZE
269+ if hasattr (fp , "peek" ) and detect_bgzip (fp .peek (18 )):
270+ # bgzip consists of puny little blocks of max 64K uncompressed data
271+ # so in practice probably more around 16K in compressed size. A
272+ # 128K buffer is a massive overshoot and slows down the
273+ # decompression.
274+ # bgzip stores the block size, so it can be unpacked more
275+ # efficiently but this is outside scope for python-isal.
276+ self ._read_buffer_size = 16 * 1024
252277
253278 def read (self , size = - 1 ):
254279 if size < 0 :
@@ -282,7 +307,7 @@ def read(self, size=-1):
282307
283308 # Read a chunk of data from the file
284309 if self ._decompressor .needs_input :
285- buf = self ._fp .read (READ_BUFFER_SIZE )
310+ buf = self ._fp .read (self . _read_buffer_size )
286311 uncompress = self ._decompressor .decompress (buf , size )
287312 else :
288313 uncompress = self ._decompressor .decompress (b"" , size )
0 commit comments