Skip to content

Commit 2e5e4dc

Browse files
authored
Merge pull request #63 from pycompression/release_0.8.1
Release 0.8.1
2 parents 1d89ae7 + ee3fd78 commit 2e5e4dc

File tree

8 files changed

+55
-8
lines changed

8 files changed

+55
-8
lines changed

.github/workflows/ci.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ jobs:
8181
run: sudo apt install yasm
8282
if: runner.os == 'Linux'
8383
- name: Install build dependencies (Macos)
84-
run: brew install nasm automake autoconf
84+
# Install yasm because nasm does not work when building wheels.
85+
# Probably because of nasm-filter.sh not filtering all flags that can not be used.
86+
run: brew install yasm automake autoconf
8587
if: runner.os == 'macOS'
8688
- name: Set MSVC developer prompt
8789
uses: ilammy/[email protected]
@@ -141,7 +143,7 @@ jobs:
141143
- name: Install cibuildwheel twine wheel
142144
run: python -m pip install cibuildwheel twine wheel
143145
- name: Install build dependencies (Macos)
144-
run: brew install nasm automake autoconf
146+
run: brew install yasm automake autoconf
145147
if: runner.os == 'macOS'
146148
- name: Set MSVC developer prompt
147149
uses: ilammy/[email protected]

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ Changelog
77
.. This document is user facing. Please word the changes in such a way
88
.. that users understand how the changes affect the new version.
99
10+
version 0.8.1
11+
-----------------
12+
+ Fix a bug where multi-member gzip files where read incorrectly due to an
13+
offset error. This was caused by ISA-L's decompressobj having a small
14+
bitbuffer which was not taken properly into account in some circumstances.
15+
1016
version 0.8.0
1117
-----------------
1218
+ Speed up ``igzip.compress`` and ``igzip.decompress`` by improving the

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def build_isa_l(compiler_command: str, compiler_options: str):
169169

170170
setup(
171171
name="isal",
172-
version="0.8.0",
172+
version="0.8.1",
173173
description="Faster zlib and gzip compatible compression and "
174174
"decompression by providing python bindings for the ISA-L "
175175
"library.",
@@ -197,7 +197,7 @@ def build_isa_l(compiler_command: str, compiler_options: str):
197197
"Programming Language :: Python :: 3.8",
198198
"Programming Language :: Python :: 3.9",
199199
"Programming Language :: Cython",
200-
"Development Status :: 3 - Alpha",
200+
"Development Status :: 4 - Beta",
201201
"Topic :: System :: Archiving :: Compression",
202202
"License :: OSI Approved :: MIT License",
203203
"Operating System :: POSIX :: Linux",

src/isal/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@
3939
"__version__"
4040
]
4141

42-
__version__ = "0.8.0"
42+
__version__ = "0.8.1"

src/isal/igzip.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import sys
3030
import time
3131
from typing import List, Optional, SupportsInt
32+
import _compression # noqa: I201 # Not third-party
3233

3334
from . import isal_zlib
3435

@@ -204,11 +205,35 @@ def write(self, data):
204205
return length
205206

206207

208+
class _PaddedFile(gzip._PaddedFile):
209+
# Overwrite _PaddedFile from gzip as its prepend method assumes that
210+
# the prepended data is always read from its _buffer. Unfortunately in
211+
# isal_zlib.decompressobj there is a bitbuffer as well which may be added.
212+
# So an extra check is added to prepend to ensure no extra data in front
213+
# of the buffer was present. (Negative self._read).
214+
def prepend(self, prepend=b''):
215+
if self._read is not None:
216+
# Assume data was read since the last prepend() call
217+
self._read -= len(prepend)
218+
if self._read >= 0:
219+
return
220+
# If self._read is negative the data was read further back and
221+
# the buffer needs to be reset.
222+
self._buffer = prepend
223+
self._length = len(self._buffer)
224+
self._read = 0
225+
226+
207227
class _IGzipReader(gzip._GzipReader):
208228
def __init__(self, fp):
209-
super().__init__(fp)
210-
self._decomp_factory = isal_zlib.decompressobj
211-
self._decompressor = self._decomp_factory(**self._decomp_args)
229+
# Call the init method of gzip._GzipReader's parent here.
230+
# It is not very invasive and allows us to override _PaddedFile
231+
_compression.DecompressReader.__init__(
232+
self, _PaddedFile(fp), isal_zlib.decompressobj,
233+
wbits=-isal_zlib.MAX_WBITS)
234+
# Set flag indicating start of a new member
235+
self._new_member = True
236+
self._last_mtime = None
212237

213238
def _add_read_data(self, data):
214239
# Use faster isal crc32 calculation and update the stream size in place

tests/data/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,9 @@ dwgsim -N 10000 -e 0.0010 -E 0.0010 -1 152 -2 152 -c 0 -S1 -z 1 -Q 5 \
1212
reference.fasta test
1313
gzip -c -9 test.bwa.read1.fastq > test.fastq.gz
1414
```
15+
16+
concatenated.fastq.gz was created with:
17+
```
18+
gzip -cd tests/data/test.fastq.gz | head -n 1000 | gzip -c -1 >> test2.fastq.gz
19+
gzip -cd tests/data/test.fastq.gz | head -n 1000 | gzip -c -1 >> test2.fastq.gz
20+
```

tests/data/concatenated.fastq.gz

90.5 KB
Binary file not shown.

tests/test_igzip.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,11 @@ def test_header_corrupt():
288288
def test_truncated_header(trunc):
289289
with pytest.raises(EOFError):
290290
igzip.decompress(trunc)
291+
292+
293+
def test_concatenated_gzip():
294+
concat = Path(__file__).parent / "data" / "concatenated.fastq.gz"
295+
data = gzip.decompress(concat.read_bytes())
296+
with igzip.open(concat, "rb") as igzip_h:
297+
result = igzip_h.read()
298+
assert data == result

0 commit comments

Comments
 (0)