|
28 | 28 | import _compression # noqa: I201 # Not third-party
|
29 | 29 |
|
30 | 30 | from . import zlib_ng
|
| 31 | +from .zlib_ng import _GzipReader |
31 | 32 |
|
32 | 33 | __all__ = ["GzipFile", "open", "compress", "decompress", "BadGzipFile",
|
33 | 34 | "READ_BUFFER_SIZE"]
|
|
36 | 37 | _COMPRESS_LEVEL_TRADEOFF = zlib_ng.Z_DEFAULT_COMPRESSION
|
37 | 38 | _COMPRESS_LEVEL_BEST = zlib_ng.Z_BEST_COMPRESSION
|
38 | 39 |
|
39 |
| -#: The amount of data that is read in at once when decompressing a file. |
40 |
| -#: Increasing this value may increase performance. |
41 |
| -#: 128K is also the size used by pigz and cat to read files from the |
42 |
| -# filesystem. |
43 |
| -READ_BUFFER_SIZE = 128 * 1024 |
| 40 | +# The amount of data that is read in at once when decompressing a file. |
| 41 | +# Increasing this value may increase performance. |
| 42 | +READ_BUFFER_SIZE = 512 * 1024 |
44 | 43 |
|
45 | 44 | FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
|
46 | 45 | READ, WRITE = 1, 2
|
47 | 46 |
|
48 |
| -try: |
49 |
| - BadGzipFile = gzip.BadGzipFile # type: ignore |
50 |
| -except AttributeError: # Versions lower than 3.8 do not have BadGzipFile |
51 |
| - BadGzipFile = OSError # type: ignore |
| 47 | +BadGzipFile = gzip.BadGzipFile # type: ignore |
52 | 48 |
|
53 | 49 |
|
54 | 50 | # The open method was copied from the CPython source with minor adjustments.
|
@@ -149,7 +145,7 @@ def __init__(self, filename=None, mode=None,
|
149 | 145 | zlib_ng.DEF_MEM_LEVEL,
|
150 | 146 | 0)
|
151 | 147 | if self.mode == READ:
|
152 |
| - raw = _GzipNGReader(self.fileobj) |
| 148 | + raw = _GzipReader(self.fileobj, READ_BUFFER_SIZE) |
153 | 149 | self._buffer = io.BufferedReader(raw)
|
154 | 150 |
|
155 | 151 | def __repr__(self):
|
@@ -180,73 +176,9 @@ def write(self, data):
|
180 | 176 | return length
|
181 | 177 |
|
182 | 178 |
|
183 |
| -class _GzipNGReader(gzip._GzipReader): |
184 |
| - def __init__(self, fp): |
185 |
| - # Call the init method of gzip._GzipReader's parent here. |
186 |
| - # It is not very invasive and allows us to override _PaddedFile |
187 |
| - _compression.DecompressReader.__init__( |
188 |
| - self, gzip._PaddedFile(fp), zlib_ng._ZlibDecompressor, |
189 |
| - wbits=-zlib_ng.MAX_WBITS) |
190 |
| - # Set flag indicating start of a new member |
191 |
| - self._new_member = True |
192 |
| - self._last_mtime = None |
193 |
| - |
194 |
| - def read(self, size=-1): |
195 |
| - if size < 0: |
196 |
| - return self.readall() |
197 |
| - # size=0 is special because decompress(max_length=0) is not supported |
198 |
| - if not size: |
199 |
| - return b"" |
200 |
| - |
201 |
| - # For certain input data, a single |
202 |
| - # call to decompress() may not return |
203 |
| - # any data. In this case, retry until we get some data or reach EOF. |
204 |
| - while True: |
205 |
| - if self._decompressor.eof: |
206 |
| - # Ending case: we've come to the end of a member in the file, |
207 |
| - # so finish up this member, and read a new gzip header. |
208 |
| - # Check the CRC and file size, and set the flag so we read |
209 |
| - # a new member |
210 |
| - self._read_eof() |
211 |
| - self._new_member = True |
212 |
| - self._decompressor = self._decomp_factory( |
213 |
| - **self._decomp_args) |
214 |
| - |
215 |
| - if self._new_member: |
216 |
| - # If the _new_member flag is set, we have to |
217 |
| - # jump to the next member, if there is one. |
218 |
| - self._init_read() |
219 |
| - if not self._read_gzip_header(): |
220 |
| - self._size = self._pos |
221 |
| - return b"" |
222 |
| - self._new_member = False |
223 |
| - |
224 |
| - # Read a chunk of data from the file |
225 |
| - if self._decompressor.needs_input: |
226 |
| - buf = self._fp.read(READ_BUFFER_SIZE) |
227 |
| - uncompress = self._decompressor.decompress(buf, size) |
228 |
| - else: |
229 |
| - uncompress = self._decompressor.decompress(b"", size) |
230 |
| - if self._decompressor.unused_data != b"": |
231 |
| - # Prepend the already read bytes to the fileobj so they can |
232 |
| - # be seen by _read_eof() and _read_gzip_header() |
233 |
| - self._fp.prepend(self._decompressor.unused_data) |
234 |
| - |
235 |
| - if uncompress != b"": |
236 |
| - break |
237 |
| - if buf == b"": |
238 |
| - raise EOFError("Compressed file ended before the " |
239 |
| - "end-of-stream marker was reached") |
240 |
| - |
241 |
| - self._crc = zlib_ng.crc32(uncompress, self._crc) |
242 |
| - self._stream_size += len(uncompress) |
243 |
| - self._pos += len(uncompress) |
244 |
| - return uncompress |
245 |
| - |
246 |
| - |
247 | 179 | # Aliases for improved compatibility with CPython gzip module.
|
248 | 180 | GzipFile = GzipNGFile
|
249 |
| -_GzipReader = _GzipNGReader |
| 181 | +_GzipNGReader = _GzipReader |
250 | 182 |
|
251 | 183 |
|
252 | 184 | def _read_exact(fp, n):
|
@@ -342,25 +274,9 @@ def decompress(data):
|
342 | 274 | """Decompress a gzip compressed string in one shot.
|
343 | 275 | Return the decompressed string.
|
344 | 276 | """
|
345 |
| - decompressed_members = [] |
346 |
| - while True: |
347 |
| - fp = io.BytesIO(data) |
348 |
| - if _read_gzip_header(fp) is None: |
349 |
| - return b"".join(decompressed_members) |
350 |
| - # Use a zlib raw deflate compressor |
351 |
| - do = zlib_ng.decompressobj(wbits=-zlib_ng.MAX_WBITS) |
352 |
| - # Read all the data except the header |
353 |
| - decompressed = do.decompress(data[fp.tell():]) |
354 |
| - if not do.eof or len(do.unused_data) < 8: |
355 |
| - raise EOFError("Compressed file ended before the end-of-stream " |
356 |
| - "marker was reached") |
357 |
| - crc, length = struct.unpack("<II", do.unused_data[:8]) |
358 |
| - if crc != zlib_ng.crc32(decompressed): |
359 |
| - raise BadGzipFile("CRC check failed") |
360 |
| - if length != (len(decompressed) & 0xffffffff): |
361 |
| - raise BadGzipFile("Incorrect length of data produced") |
362 |
| - decompressed_members.append(decompressed) |
363 |
| - data = do.unused_data[8:].lstrip(b"\x00") |
| 277 | + fp = io.BytesIO(data) |
| 278 | + reader = _GzipReader(fp, max(len(data), 16)) |
| 279 | + return reader.readall() |
364 | 280 |
|
365 | 281 |
|
366 | 282 | def _argument_parser():
|
|
0 commit comments