@@ -116,6 +116,35 @@ def __exit__(self, exc_type, exc_val, exc_tb):
116
116
117
117
118
118
class ThreadedGzipWriter (io .RawIOBase ):
119
+ """
120
+ Write a gzip file using multiple threads.
121
+
122
+ This class is heavily inspired by pigz from Mark Adler
123
+ (https://github.com/madler/pigz). It works similarly.
124
+
125
+ Each thread gets its own input and output queue. The program performs a
126
+ round robin using an index. The writer thread reads from the output
127
+ queues in a round robin using an index. This way all the blocks will be
128
+ written to the output stream in order while still allowing independent
129
+ compression for each thread.
130
+
131
+ Writing to the ThreadedGzipWriter happens on the main thread in a
132
+ io.BufferedWriter. The BufferedWriter will offer a memoryview of its
133
+ buffer. Using the bytes constructor this is made into an immutable block of
134
+ data.
135
+
136
+ A reference to the previous block is used to create a memoryview of the
137
+ last 32k of that block. This is used as a dictionary for the compression
138
+ allowing for better compression rates.
139
+
140
+ The current block and the dictionary are pushed into an input queue. They
141
+ are picked up by a compression worker that calculates the crc32, the
142
+ length of the data and compresses the block. The compressed block, checksum
143
+ and length are pushed into an output queue.
144
+
145
+ The writer thread reads from output queues and uses the crc32_combine
146
+ function to calculate the total crc. It also writes the compressed block.
147
+ """
119
148
def __init__ (self ,
120
149
fp : BinaryIO ,
121
150
level : int = isal_zlib .ISAL_DEFAULT_COMPRESSION ,
0 commit comments