@@ -10,6 +10,7 @@ import contextlib
1010import datetime
1111import gzip
1212import hashlib
13+ import io
1314import json
1415import lzma
1516import multiprocessing
@@ -317,21 +318,41 @@ def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data:
317318 subprocess .run (["gpgconf" , "--kill" , "gpg-agent" ], env = env )
318319
319320
320- def open_tar_stream (path : pathlib .Path ):
321- """"""
322- if path .suffix == ".bz2" :
323- return bz2 .open (str (path ), "rb" )
324- elif path .suffix in (".gz" , ".tgz" ) :
325- return gzip .open (str (path ), "rb" )
326- elif path .suffix == ".xz" :
327- return lzma .open (str (path ), "rb" )
328- elif path .suffix == ".zst" :
329- dctx = ZstdDecompressor ()
330- return dctx .stream_reader (path .open ("rb" ))
331- elif path .suffix == ".tar" :
332- return path .open ("rb" )
333- else :
334- raise ValueError ("unknown archive format for tar file: %s" % path )
321+ class ArchiveTypeNotSupported (Exception ):
322+ def __init__ (self , path : pathlib .Path ):
323+ super (Exception , self ).__init__ ("Archive type not supported for %s" % path )
324+
325+
326+ def open_stream (path : pathlib .Path ):
327+ """Attempt to identify a path as an extractable archive by looking at its
328+ content."""
329+ fh = path .open (mode = "rb" )
330+ magic = fh .read (6 )
331+ fh .seek (0 )
332+ if magic [:2 ] == b"PK" :
333+ return "zip" , fh
334+ if magic [:2 ] == b"\x1f \x8b " :
335+ fh = gzip .GzipFile (fileobj = fh )
336+ elif magic [:3 ] == b"BZh" :
337+ fh = bz2 .BZ2File (fh )
338+ elif magic == b"\xfd 7zXZ\x00 " :
339+ fh = lzma .LZMAFile (fh )
340+ elif magic [:4 ] == b"\x28 \xb5 \x2f \xfd " :
341+ fh = ZstdDecompressor ().stream_reader (fh )
342+ fh = io .BufferedReader (fh )
343+ try :
344+ # A full tar info header is 512 bytes.
345+ headers = fh .peek (512 )
346+ # 257 is the offset of the ustar magic.
347+ magic = headers [257 : 257 + 8 ]
348+ # For older unix tar, rely on TarInfo.frombuf's checksum check
349+ if magic in (b"ustar\x00 00" , b"ustar \x00 " ) or tarfile .TarInfo .frombuf (
350+ headers [:512 ], tarfile .ENCODING , "surrogateescape"
351+ ):
352+ return "tar" , fh
353+ except Exception as e :
354+ pass
355+ raise ArchiveTypeNotSupported (path )
335356
336357
337358def archive_type (path : pathlib .Path ):
@@ -344,7 +365,7 @@ def archive_type(path: pathlib.Path):
344365 return None
345366
346367
347- def extract_archive (path , dest_dir , typ ):
368+ def extract_archive (path , dest_dir ):
348369 """Extract an archive to a destination directory."""
349370
350371 # Resolve paths to absolute variants.
@@ -356,8 +377,8 @@ def extract_archive(path, dest_dir, typ):
356377
357378 # We pipe input to the decompressor program so that we can apply
358379 # custom decompressors that the program may not know about.
380+ typ , ifh = open_stream (path )
359381 if typ == "tar" :
360- ifh = open_tar_stream (path )
361382 # On Windows, the tar program doesn't support things like symbolic
362383 # links, while Windows actually support them. The tarfile module in
363384 # python does. So use that. But since it's significantly slower than
@@ -404,10 +425,8 @@ def repack_archive(
404425):
405426 assert orig != dest
406427 log ("Repacking as %s" % dest )
407- orig_typ = archive_type (orig )
428+ orig_typ , ifh = open_stream (orig )
408429 typ = archive_type (dest )
409- if not orig_typ :
410- raise Exception ("Archive type not supported for %s" % orig .name )
411430 if not typ :
412431 raise Exception ("Archive type not supported for %s" % dest .name )
413432
@@ -433,7 +452,7 @@ def repack_archive(
433452 ctx = ZstdCompressor ()
434453 if orig_typ == "zip" :
435454 assert typ == "tar"
436- zip = zipfile .ZipFile (orig )
455+ zip = zipfile .ZipFile (ifh )
437456 # Convert the zip stream to a tar on the fly.
438457 with ctx .stream_writer (fh ) as compressor , tarfile .open (
439458 fileobj = compressor , mode = "w:"
@@ -475,7 +494,6 @@ def repack_archive(
475494 raise Exception ("Repacking a tar to zip is not supported" )
476495 assert typ == "tar"
477496
478- ifh = open_tar_stream (orig )
479497 if filter :
480498 # To apply the filter, we need to open the tar stream and
481499 # tweak it.
@@ -518,11 +536,12 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
518536 if not extract :
519537 return
520538
521- typ = archive_type (dest_path )
522- if typ :
523- extract_archive (dest_path , dest_dir , typ )
539+ try :
540+ extract_archive (dest_path , dest_dir )
524541 log ("Removing %s" % dest_path )
525542 dest_path .unlink ()
543+ except ArchiveTypeNotSupported :
544+ pass
526545
527546
528547def fetch_urls (downloads ):
0 commit comments