28
28
import struct
29
29
import sys
30
30
import time
31
- from typing import List , Optional , SupportsInt
31
+ from typing import Optional , SupportsInt
32
32
import _compression # noqa: I201 # Not third-party
33
33
34
34
from . import igzip_lib , isal_zlib
49
49
try :
50
50
BadGzipFile = gzip .BadGzipFile # type: ignore
51
51
except AttributeError : # Versions lower than 3.8 do not have BadGzipFile
52
- BadGzipFile = OSError
52
+ BadGzipFile = OSError # type: ignore
53
53
54
54
55
55
# The open method was copied from the CPython source with minor adjustments.
@@ -349,11 +349,13 @@ def _gzip_header_end(data: bytes) -> int:
349
349
raise BadGzipFile (f"Not a gzipped file ({ repr (data [:2 ])} )" )
350
350
if method != 8 :
351
351
raise BadGzipFile ("Unknown compression method" )
352
+ if not flags : # Likely when data compressed in memory
353
+ return 10
352
354
pos = 10
353
355
if flags & FEXTRA :
354
356
if len (data ) < pos + 2 :
355
357
raise eof_error
356
- xlen = int . from_bytes ( data [pos : pos + 2 ], "little" , signed = False )
358
+ xlen , = struct . unpack ( "<H" , data [pos : pos + 2 ] )
357
359
pos += 2 + xlen
358
360
if flags & FNAME :
359
361
pos = data .find (b"\x00 " , pos ) + 1
@@ -367,12 +369,12 @@ def _gzip_header_end(data: bytes) -> int:
367
369
if flags & FHCRC :
368
370
if len (data ) < pos + 2 :
369
371
raise eof_error
370
- header_crc = int . from_bytes ( data [pos : pos + 2 ], "little" , signed = False )
372
+ header_crc , = struct . unpack ( "<H" , data [pos : pos + 2 ] )
371
373
# CRC is stored as a 16-bit integer by taking last bits of crc32.
372
374
crc = isal_zlib .crc32 (data [:pos ]) & 0xFFFF
373
375
if header_crc != crc :
374
- raise BadGzipFile (f"Corrupted header. Checksums do not "
375
- f"match: { crc } != { header_crc } " )
376
+ raise BadGzipFile (f"Corrupted gzip header. Checksums do not "
377
+ f"match: { crc :04x } != { header_crc :04x } " )
376
378
pos += 2
377
379
return pos
378
380
@@ -381,26 +383,25 @@ def decompress(data):
381
383
"""Decompress a gzip compressed string in one shot.
382
384
Return the decompressed string.
383
385
"""
384
- all_blocks : List [ bytes ] = []
386
+ decompressed_members = []
385
387
while True :
386
- if data == b"" :
387
- break
388
+ if not data : # Empty data returns empty bytestring
389
+ return b"" . join ( decompressed_members )
388
390
header_end = _gzip_header_end (data )
389
- do = isal_zlib .decompressobj (- 15 )
390
- block = do .decompress (data [header_end :]) + do .flush ()
391
+ # Use a zlib raw deflate compressor
392
+ do = isal_zlib .decompressobj (wbits = - isal_zlib .MAX_WBITS )
393
+ # Read all the data except the header
394
+ decompressed = do .decompress (data [header_end :])
391
395
if not do .eof or len (do .unused_data ) < 8 :
392
396
raise EOFError ("Compressed file ended before the end-of-stream "
393
397
"marker was reached" )
394
- checksum , length = struct .unpack ("<II" , do .unused_data [:8 ])
395
- crc = isal_zlib .crc32 (block )
396
- if crc != checksum :
398
+ crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
399
+ if crc != isal_zlib .crc32 (decompressed ):
397
400
raise BadGzipFile ("CRC check failed" )
398
- if length != len (block ):
401
+ if length != ( len (decompressed ) & 0xffffffff ):
399
402
raise BadGzipFile ("Incorrect length of data produced" )
400
- all_blocks .append (block )
401
- # Remove all padding null bytes and start next block.
403
+ decompressed_members .append (decompressed )
402
404
data = do .unused_data [8 :].lstrip (b"\x00 " )
403
- return b"" .join (all_blocks )
404
405
405
406
406
407
def _argument_parser ():
0 commit comments