|
25 | 25 | import gzip
|
26 | 26 | import io
|
27 | 27 | import os
|
| 28 | +import struct |
28 | 29 | import sys
|
| 30 | +from typing import List |
29 | 31 |
|
30 | 32 | from . import isal_zlib
|
31 | 33 |
|
|
35 | 37 | _COMPRESS_LEVEL_TRADEOFF = isal_zlib.ISAL_DEFAULT_COMPRESSION
|
36 | 38 | _COMPRESS_LEVEL_BEST = isal_zlib.ISAL_BEST_COMPRESSION
|
37 | 39 |
|
| 40 | +FTEXT, FHCRC, FHEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 |
| 41 | + |
38 | 42 | try:
|
39 | 43 | BadGzipFile = gzip.BadGzipFile # type: ignore
|
40 | 44 | except AttributeError: # Versions lower than 3.8 do not have BadGzipFile
|
@@ -229,12 +233,53 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
|
229 | 233 | return buf.getvalue()
|
230 | 234 |
|
231 | 235 |
|
| 236 | +def _gzip_header_end(data: bytes) -> int: |
| 237 | + if len(data) < 10: |
| 238 | + raise ValueError("Gzip header should be 10 bytes or more") |
| 239 | + magic, method, flags, mtime, xfl, os = struct.unpack("<HBBIBB", data[:10], ) |
| 240 | + if magic != 0x8b1f: |
| 241 | + raise BadGzipFile(f"Not a gzipped file ({repr(data[:2])})") |
| 242 | + if method != 8: |
| 243 | + raise BadGzipFile("Unknown compression method") |
| 244 | + pos = 10 |
| 245 | + if flags & FHEXTRA: |
| 246 | + xlen = struct.unpack("<H", data[pos: pos+2]) |
| 247 | + pos += xlen |
| 248 | + if flags & FNAME: |
| 249 | + fname_end = data.index(b"\x00", pos) + 1 |
| 250 | + pos = fname_end |
| 251 | + if flags & FCOMMENT: |
| 252 | + fcomment_end = data.index(b"\x00", pos) + 1 |
| 253 | + pos = fcomment_end |
| 254 | + if flags & FHCRC: |
| 255 | + pos += 2 |
| 256 | + return pos |
| 257 | + |
| 258 | + |
232 | 259 | def decompress(data):
|
233 | 260 | """Decompress a gzip compressed string in one shot.
|
234 | 261 | Return the decompressed string.
|
235 | 262 | """
|
236 |
| - with _IGzipReader(io.BytesIO(data)) as f: |
237 |
| - return f.read() |
| 263 | + all_blocks: List[bytes] = [] |
| 264 | + while True: |
| 265 | + if data == b"": |
| 266 | + break |
| 267 | + header_end = _gzip_header_end(data) |
| 268 | + do = isal_zlib.decompressobj(-15) |
| 269 | + block = do.decompress(data[header_end:]) + do.flush() |
| 270 | + if not do.eof or len(do.unused_data) < 8: |
| 271 | + raise EOFError("Compressed file ended before the end-of-stream " |
| 272 | + "marker was reached") |
| 273 | + checksum, length = struct.unpack("<II", do.unused_data[:8]) |
| 274 | + crc = isal_zlib.crc32(block) |
| 275 | + if crc != checksum: |
| 276 | + raise BadGzipFile("CRC check failed") |
| 277 | + if length != len(block): |
| 278 | + raise BadGzipFile("Incorrect length of data produced") |
| 279 | + all_blocks.append(block) |
| 280 | + # Remove all padding null bytes and start next block. |
| 281 | + data = do.unused_data[8:].lstrip(b"\x00") |
| 282 | + return b"".join(all_blocks) |
238 | 283 |
|
239 | 284 |
|
240 | 285 | def main():
|
|
0 commit comments