Skip to content

Commit d8a0376

Browse files
committed
Obtain BGZIP block size information from the header if present
1 parent 8c0591e commit d8a0376

File tree

1 file changed

+15
-4
lines changed

1 file changed

+15
-4
lines changed

src/isal/igzip.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,14 +270,21 @@ def _read_gzip_header(fp):
270270
(method, flag, last_mtime) = struct.unpack("<BBIxx", common_fields)
271271
if method != 8:
272272
raise BadGzipFile('Unknown compression method')
273+
block_size = None
273274
if not flag: # Likely when data compressed in memory
274-
return last_mtime
275+
return last_mtime, block_size
275276
header = magic + common_fields
276277
if flag & FEXTRA:
277278
# Read & discard the extra field, if present
278279
encoded_length = _read_exact(fp, 2)
279280
extra_len, = struct.unpack("<H", encoded_length)
280281
extra_field = _read_exact(fp, extra_len)
282+
# Bgzip file detection
283+
if extra_len == 6:
284+
s1, s2, slen, bsize = struct.unpack("<BBHH", extra_field)
285+
if s1 == 66 and s2 == 67 and slen == 2:
286+
# Bgzip magic and correct slen.
287+
block_size = bsize
281288
header = header + encoded_length + extra_field
282289
if flag & FNAME:
283290
# Read and discard a null-terminated string containing the filename
@@ -300,7 +307,7 @@ def _read_gzip_header(fp):
300307
if header_crc != crc:
301308
raise BadGzipFile(f"Corrupted gzip header. Checksums do not "
302309
f"match: {crc:04x} != {header_crc:04x}")
303-
return last_mtime
310+
return last_mtime, block_size
304311

305312

306313
class _PaddedFile(gzip._PaddedFile):
@@ -343,10 +350,14 @@ def __init__(self, fp):
343350
self._read_buffer_size = 16 * 1024
344351

345352
def _read_gzip_header(self):
346-
last_mtime = _read_gzip_header(self._fp)
347-
if last_mtime is None:
353+
header_info = _read_gzip_header(self._fp)
354+
if header_info is None:
348355
return False
356+
# Get the BGZF block size from the header if present
357+
last_mtime, block_size = header_info
349358
self._last_mtime = last_mtime
359+
self._read_buffer_size = (block_size if block_size is not None
360+
else READ_BUFFER_SIZE)
350361
return True
351362

352363
def read(self, size=-1):

0 commit comments

Comments
 (0)