Skip to content

Commit 32a2d15

Browse files
committed
Remove old bgzip detection code
1 parent d8a0376 commit 32a2d15

File tree

1 file changed

+8
-26
lines changed

1 file changed

+8
-26
lines changed

src/isal/igzip.py

Lines changed: 8 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -220,22 +220,6 @@ def write(self, data):
220220
return length
221221

222222

223-
def detect_bgzip(header: bytes) -> bool:
224-
if len(header) < 18:
225-
return False
226-
magic, method, flags, mtime, xfl, os, xlen, si1, si2, slen, bsize = \
227-
struct.unpack("<HBBIBBHBBHH", header[:18])
228-
return (
229-
method == 8 and # Deflate method used
230-
flags & 4 and # There are extra fields
231-
xlen == 6 and # The extra field should be of length 6
232-
si1 == 66 and # BGZIP magic number one
233-
si2 == 67 and # BGZIP magic number two
234-
slen == 2 # The length of the 16 bit integer that stores
235-
# the size of the block
236-
)
237-
238-
239223
def _read_exact(fp, n):
240224
'''Read exactly *n* bytes from `fp`
241225
@@ -255,7 +239,9 @@ def _read_exact(fp, n):
255239
def _read_gzip_header(fp):
256240
'''Read a gzip header from `fp` and progress to the end of the header.
257241
258-
Returns last mtime if header was present or None otherwise.
242+
Returns None if header not present. Parses mtime from the header, looks
243+
for BGZF format blocks and parses the block size, setting it to None if
244+
not present. Returns a tuple of mtime, block_size if a header was present.
259245
'''
260246
# Do not use read_exact because a header may not be present. Read twice
261247
# since fp might be unbuffered.
@@ -340,20 +326,16 @@ def __init__(self, fp):
340326
self._new_member = True
341327
self._last_mtime = None
342328
self._read_buffer_size = READ_BUFFER_SIZE
343-
if hasattr(fp, "peek") and detect_bgzip(fp.peek(18)):
344-
# bgzip consists of puny little blocks of max 64K uncompressed data
345-
# so in practice probably more around 16K in compressed size. A
346-
# 128K buffer is a massive overshoot and slows down the
347-
# decompression.
348-
# bgzip stores the block size, so it can be unpacked more
349-
# efficiently but this is outside scope for python-isal.
350-
self._read_buffer_size = 16 * 1024
351329

352330
def _read_gzip_header(self):
353331
header_info = _read_gzip_header(self._fp)
354332
if header_info is None:
355333
return False
356-
# Get the BGZF block size from the header if present
334+
# Get the BGZF block size from the header if present. If the read
335+
# buffer size is set to exactly the block size, there will be less
336+
# overhead as reading the file will stop right before the gzip trailer.
337+
# On normal gzip files nothing happens and this optimization is not
338+
# detrimental.
357339
last_mtime, block_size = header_info
358340
self._last_mtime = last_mtime
359341
self._read_buffer_size = (block_size if block_size is not None

0 commit comments

Comments
 (0)