@@ -220,6 +220,22 @@ def write(self, data):
220
220
return length
221
221
222
222
223
+ def detect_bgzip (header : bytes ) -> bool :
224
+ if len (header ) < 18 :
225
+ return False
226
+ magic , method , flags , mtime , xfl , os , xlen , si1 , si2 , slen , bsize = \
227
+ struct .unpack ("<HBBIBBHBBHH" , header [:18 ])
228
+ return (
229
+ method == 8 and # Deflate method used
230
+ flags & 4 and # There are extra fields
231
+ xlen == 6 and # The extra field should be of length 6
232
+ si1 == 66 and # BGZIP magic number one
233
+ si2 == 67 and # BGZIP magic number two
234
+ slen == 2 # The length of the 16 bit integer that stores
235
+ # the size of the block
236
+ )
237
+
238
+
223
239
class _PaddedFile (gzip ._PaddedFile ):
224
240
# Overwrite _PaddedFile from gzip as its prepend method assumes that
225
241
# the prepended data is always read from its _buffer. Unfortunately in
@@ -249,6 +265,15 @@ def __init__(self, fp):
249
265
# Set flag indicating start of a new member
250
266
self ._new_member = True
251
267
self ._last_mtime = None
268
+ self ._read_buffer_size = READ_BUFFER_SIZE
269
+ if hasattr (fp , "peek" ) and detect_bgzip (fp .peek (18 )):
270
+ # bgzip consists of puny little blocks of max 64K uncompressed data
271
+ # so in practice probably more around 16K in compressed size. A
272
+ # 128K buffer is a massive overshoot and slows down the
273
+ # decompression.
274
+ # bgzip stores the block size, so it can be unpacked more
275
+ # efficiently but this is outside scope for python-isal.
276
+ self ._read_buffer_size = 16 * 1024
252
277
253
278
def read (self , size = - 1 ):
254
279
if size < 0 :
@@ -282,7 +307,7 @@ def read(self, size=-1):
282
307
283
308
# Read a chunk of data from the file
284
309
if self ._decompressor .needs_input :
285
- buf = self ._fp .read (READ_BUFFER_SIZE )
310
+ buf = self ._fp .read (self . _read_buffer_size )
286
311
uncompress = self ._decompressor .decompress (buf , size )
287
312
else :
288
313
uncompress = self ._decompressor .decompress (b"" , size )
0 commit comments