1
1
import io
2
+ import multiprocessing
3
+ import os
2
4
import queue
3
5
import struct
4
6
import threading
5
- import typing
7
+ from typing import BinaryIO , List , Tuple
6
8
7
9
from . import igzip , isal_zlib
8
10
9
11
DEFLATE_WINDOW_SIZE = 2 ** 15
10
12
13
+
11
14
def open (filename , mode = "rb" , compresslevel = igzip ._COMPRESS_LEVEL_TRADEOFF ,
12
15
encoding = None , errors = None , newline = None , * , threads = - 1 ):
13
- if threads == 0 or "w" in mode :
16
+ if threads == 0 :
14
17
return igzip .open (filename , mode , compresslevel , encoding , errors ,
15
18
newline )
16
- if hasattr (filename , "read" ):
17
- fp = filename
19
+ elif threads < 0 :
20
+ try :
21
+ threads = len (os .sched_getaffinity (0 ))
22
+ except : # noqa: E722
23
+ try :
24
+ threads = multiprocessing .cpu_count ()
25
+ except : # noqa: E722
26
+ threads = 1
27
+ open_mode = mode .replace ("t" , "b" )
28
+ if isinstance (filename , (str , bytes )) or hasattr (filename , "__fspath__" ):
29
+ binary_file = io .open (filename , open_mode )
30
+ elif hasattr (filename , "read" ) or hasattr (filename , "write" ):
31
+ binary_file = filename
32
+ else :
33
+ raise TypeError ("filename must be a str or bytes object, or a file" )
34
+ if "r" in mode :
35
+ gzip_file = io .BufferedReader (ThreadedGzipReader (binary_file ))
18
36
else :
19
- fp = io .open (filename , "rb" )
20
- return io .BufferedReader (ThreadedGzipReader (fp ))
37
+ gzip_file = io .BufferedWriter (
38
+ ThreadedGzipWriter (binary_file , compresslevel , threads ),
39
+ buffer_size = 128 * 1024
40
+ )
41
+ if "t" in mode :
42
+ return io .TextIOWrapper (gzip_file , encoding , errors , newline )
43
+ return gzip_file
21
44
22
45
23
46
class ThreadedGzipReader (io .RawIOBase ):
@@ -86,21 +109,26 @@ def close(self) -> None:
86
109
self .fileobj .close ()
87
110
88
111
89
- class ThreadedWriter (io .RawIOBase ):
90
- def __init__ (self , fp : typing .BinaryIO , level : int = isal_zlib .ISAL_DEFAULT_COMPRESSION ,
91
- threads : int = 1 ,
92
- queue_size = 2 ):
112
+ class ThreadedGzipWriter (io .RawIOBase ):
113
+ def __init__ (self ,
114
+ fp : BinaryIO ,
115
+ level : int = isal_zlib .ISAL_DEFAULT_COMPRESSION ,
116
+ threads : int = 1 ,
117
+ queue_size : int = 2 ):
93
118
self .raw = fp
94
119
self .level = level
95
120
self .previous_block = b""
96
- self .crc_queue = queue .Queue (maxsize = threads * queue_size )
97
- self .input_queues = [queue .Queue (queue_size ) for _ in range (threads )]
98
- self .output_queues = [queue .Queue (queue_size ) for _ in range (threads )]
121
+ self .crc_queue : queue .Queue [bytes ] = queue .Queue (
122
+ maxsize = threads * queue_size )
123
+ self .input_queues : List [queue .Queue [Tuple [bytes , memoryview ]]] = [
124
+ queue .Queue (queue_size ) for _ in range (threads )]
125
+ self .output_queues : List [queue .Queue [bytes ]] = [
126
+ queue .Queue (queue_size ) for _ in range (threads )]
99
127
self .index = 0
100
128
self .threads = threads
101
- self ._crc = None
129
+ self ._crc = 0
102
130
self .running = False
103
- self ._size = None
131
+ self ._size = 0
104
132
self .crc_worker = threading .Thread (target = self ._calculate_crc )
105
133
self .output_worker = threading .Thread (target = self .write )
106
134
self .compression_workers = [
@@ -121,7 +149,7 @@ def _write_gzip_header(self):
121
149
os = 0xff
122
150
xfl = 4 if self .level == 0 else 0
123
151
self .raw .write (struct .pack (
124
- "BBBBIBB" , magic1 ,magic2 , method , flags , mtime , os , xfl ))
152
+ "BBBBIBB" , magic1 , magic2 , method , flags , mtime , os , xfl ))
125
153
126
154
def start (self ):
127
155
self .running = True
@@ -172,6 +200,7 @@ def close(self) -> None:
172
200
self .raw .close ()
173
201
self ._closed = True
174
202
203
+ @property
175
204
def closed (self ) -> bool :
176
205
return self ._closed
177
206
@@ -197,8 +226,10 @@ def _compress(self, index: int):
197
226
data , zdict = in_queue .get (timeout = 0.05 )
198
227
except queue .Empty :
199
228
continue
200
- compressor = isal_zlib .compressobj (self .level , wbits = - 15 , zdict = zdict )
201
- compressed = compressor .compress (data ) + compressor .flush (isal_zlib .Z_SYNC_FLUSH )
229
+ compressor = isal_zlib .compressobj (
230
+ self .level , wbits = - 15 , zdict = zdict )
231
+ compressed = compressor .compress (data ) + compressor .flush (
232
+ isal_zlib .Z_SYNC_FLUSH )
202
233
out_queue .put (compressed )
203
234
in_queue .task_done ()
204
235
0 commit comments