@@ -270,14 +270,21 @@ def _read_gzip_header(fp):
270
270
(method , flag , last_mtime ) = struct .unpack ("<BBIxx" , common_fields )
271
271
if method != 8 :
272
272
raise BadGzipFile ('Unknown compression method' )
273
+ block_size = None
273
274
if not flag : # Likely when data compressed in memory
274
- return last_mtime
275
+ return last_mtime , block_size
275
276
header = magic + common_fields
276
277
if flag & FEXTRA :
277
278
# Read & discard the extra field, if present
278
279
encoded_length = _read_exact (fp , 2 )
279
280
extra_len , = struct .unpack ("<H" , encoded_length )
280
281
extra_field = _read_exact (fp , extra_len )
282
+ # Bgzip file detection
283
+ if extra_len == 6 :
284
+ s1 , s2 , slen , bsize = struct .unpack ("<BBHH" , extra_field )
285
+ if s1 == 66 and s2 == 67 and slen == 2 :
286
+ # Bgzip magic and correct slen.
287
+ block_size = bsize
281
288
header = header + encoded_length + extra_field
282
289
if flag & FNAME :
283
290
# Read and discard a null-terminated string containing the filename
@@ -300,7 +307,7 @@ def _read_gzip_header(fp):
300
307
if header_crc != crc :
301
308
raise BadGzipFile (f"Corrupted gzip header. Checksums do not "
302
309
f"match: { crc :04x} != { header_crc :04x} " )
303
- return last_mtime
310
+ return last_mtime , block_size
304
311
305
312
306
313
class _PaddedFile (gzip ._PaddedFile ):
@@ -343,10 +350,14 @@ def __init__(self, fp):
343
350
self ._read_buffer_size = 16 * 1024
344
351
345
352
def _read_gzip_header (self ):
346
- last_mtime = _read_gzip_header (self ._fp )
347
- if last_mtime is None :
353
+ header_info = _read_gzip_header (self ._fp )
354
+ if header_info is None :
348
355
return False
356
+ # Get the BGZF block size from the header if present
357
+ last_mtime , block_size = header_info
349
358
self ._last_mtime = last_mtime
359
+ self ._read_buffer_size = (block_size if block_size is not None
360
+ else READ_BUFFER_SIZE )
350
361
return True
351
362
352
363
def read (self , size = - 1 ):
0 commit comments