Skip to content

Commit 440949b

Browse files
authored
Merge pull request #215 from pycompression/release_1.7.1
Release 1.7.1
2 parents 2f2b749 + 63f3bcd commit 440949b

File tree

6 files changed

+78
-32
lines changed

6 files changed

+78
-32
lines changed

.github/release_checklist.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ Release checklist
99
from CHANGELOG.rst.
1010
- [ ] Push tag to remote. This triggers the wheel/sdist build on github CI.
1111
- [ ] merge `main` branch back into `develop`.
12-
- [ ] Add updated version number to develop. (`setup.py` and `src/isal/__init__.py`)
1312
- [ ] Build the new tag on readthedocs. Only build the last patch version of
1413
each minor version. So `1.1.1` and `1.2.0` but not `1.1.0`, `1.1.1` and `1.2.0`.
1514
- [ ] Create a new release on github.

CHANGELOG.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ Changelog
77
.. This document is user facing. Please word the changes in such a way
88
.. that users understand how the changes affect the new version.
99
10+
version 1.7.1
11+
-----------------
12+
+ Fix a bug where flushing files when writing in threaded mode did not work
13+
properly.
14+
+ Prevent threaded opening from blocking python exit when an error is thrown
15+
in the calling thread.
16+
1017
version 1.7.0
1118
-----------------
1219
+ Include a patched ISA-L version 2.31. The applied patches make compilation

requirements-docs.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
sphinx
22
sphinx-rtd-theme
3-
# See https://github.com/sphinx-doc/sphinx-argparse/issues/56
4-
sphinx-argparse <0.5.0
3+
sphinx-argparse

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ def build_isa_l():
166166
"Programming Language :: Python :: 3.10",
167167
"Programming Language :: Python :: 3.11",
168168
"Programming Language :: Python :: 3.12",
169+
"Programming Language :: Python :: 3.13",
169170
"Programming Language :: Python :: Implementation :: CPython",
170171
"Programming Language :: Python :: Implementation :: PyPy",
171172
"Programming Language :: C",

src/isal/igzip_threaded.py

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def open(filename, mode="rb", compresslevel=igzip._COMPRESS_LEVEL_TRADEOFF,
6060
gzip_file = io.BufferedReader(
6161
_ThreadedGzipReader(filename, block_size=block_size))
6262
else:
63-
gzip_file = io.BufferedWriter(
63+
gzip_file = FlushableBufferedWriter(
6464
_ThreadedGzipWriter(
6565
filename,
6666
mode.replace("t", "b"),
@@ -101,6 +101,7 @@ def __init__(self, filename, queue_size=2, block_size=1024 * 1024):
101101
self.worker = threading.Thread(target=self._decompress)
102102
self._closed = False
103103
self.running = True
104+
self._calling_thread = threading.current_thread()
104105
self.worker.start()
105106

106107
def _check_closed(self, msg=None):
@@ -110,15 +111,15 @@ def _check_closed(self, msg=None):
110111
def _decompress(self):
111112
block_size = self.block_size
112113
block_queue = self.queue
113-
while self.running:
114+
while self.running and self._calling_thread.is_alive():
114115
try:
115116
data = self.fileobj.read(block_size)
116117
except Exception as e:
117118
self.exception = e
118119
return
119120
if not data:
120121
return
121-
while self.running:
122+
while self.running and self._calling_thread.is_alive():
122123
try:
123124
block_queue.put(data, timeout=0.05)
124125
break
@@ -166,6 +167,12 @@ def closed(self) -> bool:
166167
return self._closed
167168

168169

170+
class FlushableBufferedWriter(io.BufferedWriter):
171+
def flush(self):
172+
super().flush()
173+
self.raw.flush()
174+
175+
169176
class _ThreadedGzipWriter(io.RawIOBase):
170177
"""
171178
Write a gzip file using multiple threads.
@@ -215,6 +222,7 @@ def __init__(self,
215222
if "b" not in mode:
216223
mode += "b"
217224
self.lock = threading.Lock()
225+
self._calling_thread = threading.current_thread()
218226
self.exception: Optional[Exception] = None
219227
self.level = level
220228
self.previous_block = b""
@@ -308,30 +316,35 @@ def write(self, b) -> int:
308316
self.input_queues[worker_index].put((data, zdict))
309317
return len(data)
310318

311-
def flush(self):
319+
def _end_gzip_stream(self):
312320
self._check_closed()
313321
# Wait for all data to be compressed
314322
for in_q in self.input_queues:
315323
in_q.join()
316324
# Wait for all data to be written
317325
for out_q in self.output_queues:
318326
out_q.join()
327+
# Write an empty deflate block with a lost block marker.
328+
self.raw.write(isal_zlib.compress(b"", wbits=-15))
329+
trailer = struct.pack("<II", self._crc, self._size & 0xFFFFFFFF)
330+
self.raw.write(trailer)
331+
self._crc = 0
332+
self._size = 0
319333
self.raw.flush()
320334

335+
def flush(self):
336+
self._end_gzip_stream()
337+
self._write_gzip_header()
338+
321339
def close(self) -> None:
322340
if self._closed:
323341
return
324-
self.flush()
342+
self._end_gzip_stream()
325343
self.stop()
326344
if self.exception:
327345
self.raw.close()
328346
self._closed = True
329347
raise self.exception
330-
# Write an empty deflate block with a lost block marker.
331-
self.raw.write(isal_zlib.compress(b"", wbits=-15))
332-
trailer = struct.pack("<II", self._crc, self._size & 0xFFFFFFFF)
333-
self.raw.write(trailer)
334-
self.raw.flush()
335348
if self.closefd:
336349
self.raw.close()
337350
self._closed = True
@@ -348,7 +361,7 @@ def _compress(self, index: int):
348361
try:
349362
data, zdict = in_queue.get(timeout=0.05)
350363
except queue.Empty:
351-
if not self.running:
364+
if not (self.running and self._calling_thread.is_alive()):
352365
return
353366
continue
354367
try:
@@ -364,41 +377,31 @@ def _compress(self, index: int):
364377
def _write(self):
365378
index = 0
366379
output_queues = self.output_queues
367-
fp = self.raw
368-
total_crc = 0
369-
size = 0
370380
while True:
371381
out_index = index % self.threads
372382
output_queue = output_queues[out_index]
373383
try:
374384
compressed, crc, data_length = output_queue.get(timeout=0.05)
375385
except queue.Empty:
376-
if not self.running:
377-
self._crc = total_crc
378-
self._size = size
386+
if not (self.running and self._calling_thread.is_alive()):
379387
return
380388
continue
381-
total_crc = isal_zlib.crc32_combine(total_crc, crc, data_length)
382-
size += data_length
383-
fp.write(compressed)
389+
self._crc = isal_zlib.crc32_combine(self._crc, crc, data_length)
390+
self._size += data_length
391+
self.raw.write(compressed)
384392
output_queue.task_done()
385393
index += 1
386394

387395
def _compress_and_write(self):
388396
if not self.threads == 1:
389397
raise SystemError("Compress_and_write is for one thread only")
390-
fp = self.raw
391-
total_crc = 0
392-
size = 0
393398
in_queue = self.input_queues[0]
394399
compressor = self.compressors[0]
395400
while True:
396401
try:
397402
data, zdict = in_queue.get(timeout=0.05)
398403
except queue.Empty:
399-
if not self.running:
400-
self._crc = total_crc
401-
self._size = size
404+
if not (self.running and self._calling_thread.is_alive()):
402405
return
403406
continue
404407
try:
@@ -408,9 +411,9 @@ def _compress_and_write(self):
408411
self._set_error_and_empty_queue(e, in_queue)
409412
return
410413
data_length = len(data)
411-
total_crc = isal_zlib.crc32_combine(total_crc, crc, data_length)
412-
size += data_length
413-
fp.write(compressed)
414+
self._crc = isal_zlib.crc32_combine(self._crc, crc, data_length)
415+
self._size += data_length
416+
self.raw.write(compressed)
414417
in_queue.task_done()
415418

416419
def _set_error_and_empty_queue(self, error, q):

tests/test_igzip_threaded.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import io
1010
import itertools
1111
import os
12+
import subprocess
13+
import sys
1214
import tempfile
1315
from pathlib import Path
1416

@@ -218,3 +220,38 @@ def test_threaded_writer_does_not_close_stream():
218220
assert not test_stream.closed
219221
test_stream.seek(0)
220222
assert gzip.decompress(test_stream.read()) == b"thisisatest"
223+
224+
225+
@pytest.mark.timeout(5)
226+
@pytest.mark.parametrize(
227+
["mode", "threads"], itertools.product(["rb", "wb"], [1, 2]))
228+
def test_threaded_program_can_exit_on_error(tmp_path, mode, threads):
229+
program = tmp_path / "no_context_manager.py"
230+
test_file = tmp_path / "output.gz"
231+
# Write 40 mb input data to saturate read buffer. Because of the repetitive
232+
# nature the resulting gzip file is very small (~40 KiB).
233+
test_file.write_bytes(gzip.compress(b"test" * (10 * 1024 * 1024)))
234+
with open(program, "wt") as f:
235+
f.write("from isal import igzip_threaded\n")
236+
f.write(
237+
f"f = igzip_threaded.open('{test_file}', "
238+
f"mode='{mode}', threads={threads})\n"
239+
)
240+
f.write("raise Exception('Error')\n")
241+
subprocess.run([sys.executable, str(program)])
242+
243+
244+
@pytest.mark.parametrize("threads", [1, 2])
245+
def test_flush(tmp_path, threads):
246+
test_file = tmp_path / "output.gz"
247+
with igzip_threaded.open(test_file, "wb", threads=threads) as f:
248+
f.write(b"1")
249+
f.flush()
250+
assert gzip.decompress(test_file.read_bytes()) == b"1"
251+
f.write(b"2")
252+
f.flush()
253+
assert gzip.decompress(test_file.read_bytes()) == b"12"
254+
f.write(b"3")
255+
f.flush()
256+
assert gzip.decompress(test_file.read_bytes()) == b"123"
257+
assert gzip.decompress(test_file.read_bytes()) == b"123"

0 commit comments

Comments
 (0)