@@ -270,14 +270,21 @@ def _read_gzip_header(fp):
270270 (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , common_fields )
271271 if method != 8 :
272272 raise BadGzipFile ('Unknown compression method' )
273+ block_size = None
273274 if not flag : # Likely when data compressed in memory
274- return last_mtime
275+ return last_mtime , block_size
275276 header = magic + common_fields
276277 if flag & FEXTRA :
277278 # Read & discard the extra field, if present
278279 encoded_length = _read_exact (fp , 2 )
279280 extra_len , = struct .unpack ("<H" , encoded_length )
280281 extra_field = _read_exact (fp , extra_len )
282+ # Bgzip file detection
283+ if extra_len == 6 :
284+ s1 , s2 , slen , bsize = struct .unpack ("<BBHH" , extra_field )
285+ if s1 == 66 and s2 == 67 and slen == 2 :
286+ # Bgzip magic and correct slen.
287+ block_size = bsize
281288 header = header + encoded_length + extra_field
282289 if flag & FNAME :
283290 # Read and discard a null-terminated string containing the filename
@@ -300,7 +307,7 @@ def _read_gzip_header(fp):
300307 if header_crc != crc :
301308 raise BadGzipFile (f"Corrupted gzip header. Checksums do not "
302309 f"match: { crc :04x} != { header_crc :04x} " )
303- return last_mtime
310+ return last_mtime , block_size
304311
305312
306313class _PaddedFile (gzip ._PaddedFile ):
@@ -343,10 +350,14 @@ def __init__(self, fp):
343350 self ._read_buffer_size = 16 * 1024
344351
345352 def _read_gzip_header (self ):
346- last_mtime = _read_gzip_header (self ._fp )
347- if last_mtime is None :
353+ header_info = _read_gzip_header (self ._fp )
354+ if header_info is None :
348355 return False
356+ # Get the BGZF block size from the header if present
357+ last_mtime , block_size = header_info
349358 self ._last_mtime = last_mtime
359+ self ._read_buffer_size = (block_size if block_size is not None
360+ else READ_BUFFER_SIZE )
350361 return True
351362
352363 def read (self , size = - 1 ):
0 commit comments