@@ -60,7 +60,7 @@ def open(filename, mode="rb", compresslevel=igzip._COMPRESS_LEVEL_TRADEOFF,
60
60
gzip_file = io .BufferedReader (
61
61
_ThreadedGzipReader (filename , block_size = block_size ))
62
62
else :
63
- gzip_file = io . BufferedWriter (
63
+ gzip_file = FlushableBufferedWriter (
64
64
_ThreadedGzipWriter (
65
65
filename ,
66
66
mode .replace ("t" , "b" ),
@@ -101,6 +101,7 @@ def __init__(self, filename, queue_size=2, block_size=1024 * 1024):
101
101
self .worker = threading .Thread (target = self ._decompress )
102
102
self ._closed = False
103
103
self .running = True
104
+ self ._calling_thread = threading .current_thread ()
104
105
self .worker .start ()
105
106
106
107
def _check_closed (self , msg = None ):
@@ -110,15 +111,15 @@ def _check_closed(self, msg=None):
110
111
def _decompress (self ):
111
112
block_size = self .block_size
112
113
block_queue = self .queue
113
- while self .running :
114
+ while self .running and self . _calling_thread . is_alive () :
114
115
try :
115
116
data = self .fileobj .read (block_size )
116
117
except Exception as e :
117
118
self .exception = e
118
119
return
119
120
if not data :
120
121
return
121
- while self .running :
122
+ while self .running and self . _calling_thread . is_alive () :
122
123
try :
123
124
block_queue .put (data , timeout = 0.05 )
124
125
break
@@ -166,6 +167,12 @@ def closed(self) -> bool:
166
167
return self ._closed
167
168
168
169
170
+ class FlushableBufferedWriter (io .BufferedWriter ):
171
+ def flush (self ):
172
+ super ().flush ()
173
+ self .raw .flush ()
174
+
175
+
169
176
class _ThreadedGzipWriter (io .RawIOBase ):
170
177
"""
171
178
Write a gzip file using multiple threads.
@@ -215,6 +222,7 @@ def __init__(self,
215
222
if "b" not in mode :
216
223
mode += "b"
217
224
self .lock = threading .Lock ()
225
+ self ._calling_thread = threading .current_thread ()
218
226
self .exception : Optional [Exception ] = None
219
227
self .level = level
220
228
self .previous_block = b""
@@ -308,30 +316,35 @@ def write(self, b) -> int:
308
316
self .input_queues [worker_index ].put ((data , zdict ))
309
317
return len (data )
310
318
311
- def flush (self ):
319
+ def _end_gzip_stream (self ):
312
320
self ._check_closed ()
313
321
# Wait for all data to be compressed
314
322
for in_q in self .input_queues :
315
323
in_q .join ()
316
324
# Wait for all data to be written
317
325
for out_q in self .output_queues :
318
326
out_q .join ()
327
+ # Write an empty deflate block with a lost block marker.
328
+ self .raw .write (isal_zlib .compress (b"" , wbits = - 15 ))
329
+ trailer = struct .pack ("<II" , self ._crc , self ._size & 0xFFFFFFFF )
330
+ self .raw .write (trailer )
331
+ self ._crc = 0
332
+ self ._size = 0
319
333
self .raw .flush ()
320
334
335
+ def flush (self ):
336
+ self ._end_gzip_stream ()
337
+ self ._write_gzip_header ()
338
+
321
339
def close (self ) -> None :
322
340
if self ._closed :
323
341
return
324
- self .flush ()
342
+ self ._end_gzip_stream ()
325
343
self .stop ()
326
344
if self .exception :
327
345
self .raw .close ()
328
346
self ._closed = True
329
347
raise self .exception
330
- # Write an empty deflate block with a lost block marker.
331
- self .raw .write (isal_zlib .compress (b"" , wbits = - 15 ))
332
- trailer = struct .pack ("<II" , self ._crc , self ._size & 0xFFFFFFFF )
333
- self .raw .write (trailer )
334
- self .raw .flush ()
335
348
if self .closefd :
336
349
self .raw .close ()
337
350
self ._closed = True
@@ -348,7 +361,7 @@ def _compress(self, index: int):
348
361
try :
349
362
data , zdict = in_queue .get (timeout = 0.05 )
350
363
except queue .Empty :
351
- if not self .running :
364
+ if not ( self .running and self . _calling_thread . is_alive ()) :
352
365
return
353
366
continue
354
367
try :
@@ -364,41 +377,31 @@ def _compress(self, index: int):
364
377
def _write (self ):
365
378
index = 0
366
379
output_queues = self .output_queues
367
- fp = self .raw
368
- total_crc = 0
369
- size = 0
370
380
while True :
371
381
out_index = index % self .threads
372
382
output_queue = output_queues [out_index ]
373
383
try :
374
384
compressed , crc , data_length = output_queue .get (timeout = 0.05 )
375
385
except queue .Empty :
376
- if not self .running :
377
- self ._crc = total_crc
378
- self ._size = size
386
+ if not (self .running and self ._calling_thread .is_alive ()):
379
387
return
380
388
continue
381
- total_crc = isal_zlib .crc32_combine (total_crc , crc , data_length )
382
- size += data_length
383
- fp .write (compressed )
389
+ self . _crc = isal_zlib .crc32_combine (self . _crc , crc , data_length )
390
+ self . _size += data_length
391
+ self . raw .write (compressed )
384
392
output_queue .task_done ()
385
393
index += 1
386
394
387
395
def _compress_and_write (self ):
388
396
if not self .threads == 1 :
389
397
raise SystemError ("Compress_and_write is for one thread only" )
390
- fp = self .raw
391
- total_crc = 0
392
- size = 0
393
398
in_queue = self .input_queues [0 ]
394
399
compressor = self .compressors [0 ]
395
400
while True :
396
401
try :
397
402
data , zdict = in_queue .get (timeout = 0.05 )
398
403
except queue .Empty :
399
- if not self .running :
400
- self ._crc = total_crc
401
- self ._size = size
404
+ if not (self .running and self ._calling_thread .is_alive ()):
402
405
return
403
406
continue
404
407
try :
@@ -408,9 +411,9 @@ def _compress_and_write(self):
408
411
self ._set_error_and_empty_queue (e , in_queue )
409
412
return
410
413
data_length = len (data )
411
- total_crc = isal_zlib .crc32_combine (total_crc , crc , data_length )
412
- size += data_length
413
- fp .write (compressed )
414
+ self . _crc = isal_zlib .crc32_combine (self . _crc , crc , data_length )
415
+ self . _size += data_length
416
+ self . raw .write (compressed )
414
417
in_queue .task_done ()
415
418
416
419
def _set_error_and_empty_queue (self , error , q ):
0 commit comments